コード例 #1
0
ファイル: test_illumina.py プロジェクト: Galithil/scilifelab
    def setUp(self): 
        self.rootdir = tempfile.mkdtemp(prefix="test_illumina_run_")
        
        # Create a fcdir
        self.exp_fcid = td.generate_fc_barcode()
        self.exp_fcdir = os.path.join(self.rootdir,td.generate_run_id(fc_barcode=self.exp_fcid))
        os.mkdir(self.exp_fcdir)
        
        # Create multiple sequence read directories
        self.exp_seqdir = [os.path.join(self.exp_fcdir,"Unaligned"),
                           os.path.join(self.exp_fcdir,"Unaligned_L6"),
                           os.path.join(self.exp_fcdir,"Unaligned_L8")]
        for d in self.exp_seqdir:
            os.mkdir(d)
        
        # Create directories for undetermined indices reads
        self.exp_unmatched_directory = [os.path.join(d,"Undetermined_indices") for d in self.exp_seqdir[:-1]]
        for d in self.exp_unmatched_directory:
            os.mkdir(d)

        # Create basecall stats directories
        self.exp_basecall_stats = [os.path.join(d,"Basecall_Stats_{}".format(self.exp_fcid)) for d in self.exp_seqdir[1:]]
        for d in self.exp_basecall_stats:
            os.mkdir(d)
            
        self.run = IlluminaRun(self.exp_fcdir)
コード例 #2
0
ファイル: test_illumina.py プロジェクト: Galithil/scilifelab
 def test_get_samplesheet(self):
     """Locate the samplesheet in a folder
     """
     
     # Work in a separate subdirectory
     subdir = os.path.join(self.rootdir,"test_get_samplesheet")
     os.mkdir(subdir)
     
     # Create a few random files and folders and assert that they are not returned
     suffixes = [".csv","",""]
     for n in range(3):
         os.mkdir(os.path.join(subdir,''.join(random.choice(string.ascii_uppercase) for x in range(5))))
         fh, _ = tempfile.mkstemp(dir=subdir, suffix=suffixes[n])
         os.close(fh)
         
     self.assertIsNone(IlluminaRun.get_samplesheet(subdir),
                       "Getting non-existing samplesheet did not return None")
     
     # Create a SampleSheet.csv and a [FCID].csv file and assert that they are
     # returned with a preference for the [FCID].csv file
     fcid = td.generate_fc_barcode()
     fcdir = os.path.join(subdir,td.generate_run_id(fc_barcode=fcid))
     os.mkdir(fcdir)
     
     ss = [os.path.join(fcdir,"SampleSheet.csv"),
           os.path.join(fcdir,"{}.csv".format(fcid))]
     for s in ss:
         utils.touch_file(s)
         self.assertEqual(s,IlluminaRun.get_samplesheet(fcdir),
                          "Did not get existing {}".format(os.path.basename(s)))
 
     shutil.rmtree(subdir)
コード例 #3
0
ファイル: test_illumina.py プロジェクト: wenjingk/scilifelab
    def setUp(self):
        self.rootdir = tempfile.mkdtemp(prefix="test_illumina_run_")

        # Create a fcdir
        self.exp_fcid = td.generate_fc_barcode()
        self.exp_fcdir = os.path.join(
            self.rootdir, td.generate_run_id(fc_barcode=self.exp_fcid))
        os.mkdir(self.exp_fcdir)

        # Create multiple sequence read directories
        self.exp_seqdir = [
            os.path.join(self.exp_fcdir, "Unaligned"),
            os.path.join(self.exp_fcdir, "Unaligned_L6"),
            os.path.join(self.exp_fcdir, "Unaligned_L8")
        ]
        for d in self.exp_seqdir:
            os.mkdir(d)

        # Create directories for undetermined indices reads
        self.exp_unmatched_directory = [
            os.path.join(d, "Undetermined_indices")
            for d in self.exp_seqdir[:-1]
        ]
        for d in self.exp_unmatched_directory:
            os.mkdir(d)

        # Create basecall stats directories
        self.exp_basecall_stats = [
            os.path.join(d, "Basecall_Stats_{}".format(self.exp_fcid))
            for d in self.exp_seqdir[1:]
        ]
        for d in self.exp_basecall_stats:
            os.mkdir(d)

        self.run = IlluminaRun(self.exp_fcdir)
コード例 #4
0
 def test_get_samplesheet(self):
     """Test that the _get_samplesheet method behaves as expected
     """
     # Create a few random files and folders and assert that they are not returned
     suffixes = [".csv","",""]
     for n in range(3):
         os.mkdir(os.path.join(self.rootdir,''.join(random.choice(string.ascii_uppercase) for x in range(5))))
         fh, _ = tempfile.mkstemp(dir=self.rootdir, suffix=suffixes[n])
         os.close(fh)
         
     self.assertIsNone(sq.get_samplesheet(self.rootdir),
                       "Getting non-existing samplesheet did not return None")
     
     # Create a SampleSheet.csv and a [FCID].csv file and assert that they are
     # returned with a preference for the [FCID].csv file
     fcid = td.generate_fc_barcode()
     fcdir = os.path.join(self.rootdir,td.generate_run_id(fc_barcode=fcid))
     os.mkdir(fcdir)
     
     ss = [os.path.join(fcdir,"SampleSheet.csv"),
           os.path.join(fcdir,"{}.csv".format(fcid))]
     for s in ss:
         utils.touch_file(s)
         self.assertEqual(s,sq.get_samplesheet(fcdir),
                          "Did not get existing {}".format(os.path.basename(s)))
コード例 #5
0
 def setUp(self):
     self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_")
     
     # Generate some test fastq_files
     indexes = {td.generate_barcode(): [],
                td.generate_barcode(): [],
                td.generate_barcode(): [],
                td.generate_barcode(): []}
     args = {'instrument': td.generate_instrument(),
             'run_number': random.randint(101,9999),
             'fcid': td.generate_fc_barcode(),
             'lane': 1,
             'pair': True}
     fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir)
     os.close(fd) 
     f2 = f1.replace("_R1","_R2")
     f1h = fu.FastQWriter(f1)
     f2h = fu.FastQWriter(f2)
     for n in range(1000):
         args['index'] = random.choice(indexes.keys()) 
         record = td.generate_fastq_record(**args)
         indexes[args['index']].append(record[0])
         f1h.write(record[0:4])
         f2h.write(record[4:])
     f1h.close()
     f2h.close()
     
     # Create a samplesheet to use for demultiplexing using all but the last index
     samplesheet = f1.replace("_R1.fastq.gz",".csv")
     sdata = []
     for n, index in enumerate(indexes.keys()[0:-1]):
          sdata.append([args['fcid'],
                        str(args['lane']),
                        "Sample_{}".format(str(n)),
                        "unknown",
                        index,
                        "DemuxTest",
                        "0",
                        "",
                        "",
                        "DemuxTestProject"])
          
     # Add an entry that will not match
     s = copy.copy(sdata[-1])
     s[1] = str(args['lane']+1)
     sdata.append(s)
     
     self.samplesheet = td._write_samplesheet(sdata,samplesheet)
     self.indexes = indexes
     self.fastq_1 = f1
     self.fastq_2 = f2
コード例 #6
0
    def setUp(self):
        self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_")

        # Generate some test fastq_files
        indexes = {
            td.generate_barcode(): [],
            td.generate_barcode(): [],
            td.generate_barcode(): [],
            td.generate_barcode(): []
        }
        args = {
            'instrument': td.generate_instrument(),
            'run_number': random.randint(101, 9999),
            'fcid': td.generate_fc_barcode(),
            'lane': 1,
            'pair': True
        }
        fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir)
        os.close(fd)
        f2 = f1.replace("_R1", "_R2")
        f1h = fu.FastQWriter(f1)
        f2h = fu.FastQWriter(f2)
        for n in range(1000):
            args['index'] = random.choice(indexes.keys())
            record = td.generate_fastq_record(**args)
            indexes[args['index']].append(record[0])
            f1h.write(record[0:4])
            f2h.write(record[4:])
        f1h.close()
        f2h.close()

        # Create a samplesheet to use for demultiplexing using all but the last index
        samplesheet = f1.replace("_R1.fastq.gz", ".csv")
        sdata = []
        for n, index in enumerate(indexes.keys()[0:-1]):
            sdata.append([
                args['fcid'],
                str(args['lane']), "Sample_{}".format(str(n)), "unknown",
                index, "DemuxTest", "0", "", "", "DemuxTestProject"
            ])

        # Add an entry that will not match
        s = copy.copy(sdata[-1])
        s[1] = str(args['lane'] + 1)
        sdata.append(s)

        self.samplesheet = td._write_samplesheet(sdata, samplesheet)
        self.indexes = indexes
        self.fastq_1 = f1
        self.fastq_2 = f2