def setUp(self): """Set up a test fastq file """ self.rootdir = tempfile.mkdtemp( prefix="test_extract_pre_casava_index_") # Set up a fastq file fd, fqfile = tempfile.mkstemp(dir=self.rootdir, suffix=".fastq.gz") os.close(fd) fqw = fu.FastQWriter(fqfile) # Generate a set of indexes seqlen = 101 bclen = 6 barcodes = dict( zip([td.generate_barcode(bclen) for i in xrange(100)], [random.randint(10, 100) for i in xrange(100)])) # Generate fastq records and append the barcode for barcode, nseqs in barcodes.items(): for i in xrange(nseqs): rec = td.generate_fastq_record(**{ 'index': barcode, 'sequence_length': seqlen }) rec[1] = "{}{}A".format(rec[1], barcode) fqw.write(rec) fqw.close() self.sequence_length = seqlen self.barcode_length = bclen self.barcodes = barcodes self.fastq_file = fqfile
def setUp(self): """Set up a test fastq file """ self.rootdir = tempfile.mkdtemp(prefix="test_extract_pre_casava_index_") # Set up a fastq file fd, fqfile = tempfile.mkstemp(dir=self.rootdir,suffix=".fastq.gz") os.close(fd) fqw = fu.FastQWriter(fqfile) # Generate a set of indexes seqlen = 101 bclen = 6 barcodes = dict(zip([td.generate_barcode(bclen) for i in xrange(100)],[random.randint(10,100) for i in xrange(100)])) # Generate fastq records and append the barcode for barcode, nseqs in barcodes.items(): for i in xrange(nseqs): rec = td.generate_fastq_record(**{'index': barcode, 'sequence_length': seqlen}) rec[1] = "{}{}A".format(rec[1],barcode) fqw.write(rec) fqw.close() self.sequence_length = seqlen self.barcode_length = bclen self.barcodes = barcodes self.fastq_file = fqfile
def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_") # Generate some test fastq_files indexes = {td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): []} args = {'instrument': td.generate_instrument(), 'run_number': random.randint(101,9999), 'fcid': td.generate_fc_barcode(), 'lane': 1, 'pair': True} fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir) os.close(fd) f2 = f1.replace("_R1","_R2") f1h = fu.FastQWriter(f1) f2h = fu.FastQWriter(f2) for n in range(1000): args['index'] = random.choice(indexes.keys()) record = td.generate_fastq_record(**args) indexes[args['index']].append(record[0]) f1h.write(record[0:4]) f2h.write(record[4:]) f1h.close() f2h.close() # Create a samplesheet to use for demultiplexing using all but the last index samplesheet = f1.replace("_R1.fastq.gz",".csv") sdata = [] for n, index in enumerate(indexes.keys()[0:-1]): sdata.append([args['fcid'], str(args['lane']), "Sample_{}".format(str(n)), "unknown", index, "DemuxTest", "0", "", "", "DemuxTestProject"]) # Add an entry that will not match s = copy.copy(sdata[-1]) s[1] = str(args['lane']+1) sdata.append(s) self.samplesheet = td._write_samplesheet(sdata,samplesheet) self.indexes = indexes self.fastq_1 = f1 self.fastq_2 = f2
def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_") # Generate some test fastq_files indexes = { td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): [] } args = { 'instrument': td.generate_instrument(), 'run_number': random.randint(101, 9999), 'fcid': td.generate_fc_barcode(), 'lane': 1, 'pair': True } fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir) os.close(fd) f2 = f1.replace("_R1", "_R2") f1h = fu.FastQWriter(f1) f2h = fu.FastQWriter(f2) for n in range(1000): args['index'] = random.choice(indexes.keys()) record = td.generate_fastq_record(**args) indexes[args['index']].append(record[0]) f1h.write(record[0:4]) f2h.write(record[4:]) f1h.close() f2h.close() # Create a samplesheet to use for demultiplexing using all but the last index samplesheet = f1.replace("_R1.fastq.gz", ".csv") sdata = [] for n, index in enumerate(indexes.keys()[0:-1]): sdata.append([ args['fcid'], str(args['lane']), "Sample_{}".format(str(n)), "unknown", index, "DemuxTest", "0", "", "", "DemuxTestProject" ]) # Add an entry that will not match s = copy.copy(sdata[-1]) s[1] = str(args['lane'] + 1) sdata.append(s) self.samplesheet = td._write_samplesheet(sdata, samplesheet) self.indexes = indexes self.fastq_1 = f1 self.fastq_2 = f2
def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_FastQParser_") # Create some fastq records with different properties lanes = range(1, 9) indexes = [td.generate_barcode() for n in xrange(4)] fd, fqfile = tempfile.mkstemp(suffix=".fastq.gz", dir=self.rootdir) os.close(fd) # For each lane and index, create a random number of records counts = {} fqw = fu.FastQWriter(fqfile) for l in lanes: counts[l] = {} for ix in indexes: no = random.randint(10, 99) counts[l][ix] = no for n in xrange(no): fqw.write(td.generate_fastq_record(lane=l, index=ix)) fqw.close() self.example_fq = fqfile self.example_counts = counts
def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_FastQParser_") # Create some fastq records with different properties lanes = range(1,9) indexes = [td.generate_barcode() for n in xrange(4)] fd, fqfile = tempfile.mkstemp(suffix=".fastq.gz", dir=self.rootdir) os.close(fd) # For each lane and index, create a random number of records counts = {} fqw = fu.FastQWriter(fqfile) for l in lanes: counts[l] = {} for ix in indexes: no = random.randint(10,99) counts[l][ix] = no for n in xrange(no): fqw.write(td.generate_fastq_record(lane=l, index=ix)) fqw.close() self.example_fq = fqfile self.example_counts = counts