def test_import_project(self):
     """import_project: check project is imported
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mockdir.create()
     # Check that the project is not currently present
     ap = AutoProcess(mockdir.dirn)
     self.assertFalse(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertFalse(
         'NewProj' in
         [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertFalse(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Import the project
     import_project(ap, self.new_project_dir)
     self.assertTrue(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertTrue('NewProj' in
                     [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Verify via fresh AutoProcess object
     ap2 = AutoProcess(mockdir.dirn)
     self.assertTrue(
         'NewProj' in [p.name for p in ap2.get_analysis_projects()])
     self.assertTrue(
         'NewProj' in
         [p.name for p in ap2.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj')))
Example #2
0
 def test_analysis_dir_path(self):
     """AutoProcess: analysis dir path is absolute and normalized
     """
     # Create mock Illumina run directory
     mock_illumina_run = MockIlluminaRun(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mock_illumina_run.create()
     # Set up new AutoProcess instance
     ap = AutoProcess()
     self.assertEqual(ap.analysis_dir, None)
     # Make a mock analysis dir
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mockdir.create()
     # Create Autoprocess instances from different
     # forms of path and check stored value
     rel_path = "160621_M00879_0087_000000000-AGEW9_analysis"
     abs_path = os.path.join(self.dirn, rel_path)
     rel_unnormalised = os.path.join("..", os.path.basename(self.dirn),
                                     rel_path)
     abs_unnormalised = os.path.join(self.dirn, rel_unnormalised)
     ap = AutoProcess(analysis_dir=abs_path)
     self.assertEqual(ap.analysis_dir, abs_path)
     ap = AutoProcess(analysis_dir=rel_path)
     self.assertEqual(ap.analysis_dir, abs_path)
     ap = AutoProcess(analysis_dir=abs_unnormalised)
     self.assertEqual(ap.analysis_dir, abs_path)
     ap = AutoProcess(analysis_dir=rel_unnormalised)
     self.assertEqual(ap.analysis_dir, abs_path)
 def test_import_project_with_qc(self):
     """import_project: check project with QC outputs is imported
     """
     # Make mock multiqc
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mockdir.create()
     # Add QC outputs to the project to be imported
     UpdateAnalysisProject(AnalysisProject(
         'NewProj',
         self.new_project_dir)).add_qc_outputs(include_multiqc=False)
     print(os.listdir(os.path.join(self.dirn, 'NewProj')))
     # Check that the project is not currently present
     ap = AutoProcess(mockdir.dirn)
     self.assertFalse(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertFalse(
         'NewProj' in
         [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertFalse(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Import the project
     import_project(ap, self.new_project_dir)
     self.assertTrue(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertTrue('NewProj' in
                     [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Verify via fresh AutoProcess object
     ap2 = AutoProcess(mockdir.dirn)
     self.assertTrue(
         'NewProj' in [p.name for p in ap2.get_analysis_projects()])
     self.assertTrue(
         'NewProj' in
         [p.name for p in ap2.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj')))
     # Check for QC report and ZIP file
     print(os.listdir(os.path.join(ap2.analysis_dir, 'NewProj')))
     for f in (
             "qc_report.html",
             "multiqc_report.html",
             "qc_report.NewProj.160621_M00879_0087_000000000-AGEW9.zip",
     ):
         f = os.path.join(ap2.analysis_dir, 'NewProj', f)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
 def test_make_fastqs_unknown_platform(self):
     """make_fastqs: unknown platform raises exception
     """
     # Create mock source data
     illumina_run = MockIlluminaRun(
         "171020_UNKNOWN_00002_AHGXXXX",
         "miseq",
         top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     MockBcl2fastq2Exe.create(os.path.join(self.bin,
                                           "bcl2fastq"))
     os.environ['PATH'] = "%s:%s" % (self.bin,
                                     os.environ['PATH'])
     # Do the test
     ap = AutoProcess(settings=self.settings)
     ap.setup(os.path.join(self.wd,
                           "171020_UNKNOWN_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertEqual(ap.params.bases_mask,"auto")
     self.assertTrue(ap.params.primary_data_dir is None)
     self.assertFalse(ap.params.acquired_primary_data)
     self.assertRaises(Exception,
                       make_fastqs,
                       ap,
                       protocol="standard")
 def test_make_fastqs_handle_bcl2fastq2_failure(self):
     """make_fastqs: handle bcl2fastq2 failure
     """
     # Create mock source data
     illumina_run = MockIlluminaRun("171020_M00879_00002_AHGXXXX",
                                    "miseq",
                                    top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq which will fail (i.e.
     # return non-zero exit code)
     MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"),
                              exit_code=1)
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Do the test
     ap = AutoProcess()
     ap.setup(os.path.join(self.wd, "171020_M00879_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertRaises(Exception, ap.make_fastqs, protocol="standard")
     # Check outputs
     analysis_dir = os.path.join(self.wd,
                                 "171020_M00879_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_M00879_00002_AHGXXXX"),
                    os.path.join("logs", "002_make_fastqs"), "bcl2fastq"):
         self.assertTrue(os.path.isdir(os.path.join(analysis_dir, subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info", "statistics_full.info",
                   "per_lane_statistics.info", "per_lane_sample_stats.info",
                   "projects.info", "processing_qc.html"):
         self.assertFalse(os.path.exists(os.path.join(analysis_dir, filen)),
                          "Missing file: %s" % filen)
Example #6
0
    def test_ignore_commented_projects(self):
        """AutoProcess.get_analysis_projects: ignore commented projects
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_K00879_0087_000000000-AGEW9',
            'hiseq',
            metadata={
                "run_number": 87,
                "source": "local"
            },
            top_dir=self.dirn)
        mockdir.create()
        # Update the projects.info file
        projects_info = os.path.join(mockdir.dirn, "projects.info")
        with open(projects_info, "w") as fp:
            fp.write(
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
#AB\tAB1,AB2\tAlan Brown\tRNA-seq\t.\tHuman\tAudrey Benson\t1% PhiX
CDE\tCDE3,CDE4\tClive David Edwards\tChIP-seq\t.\tMouse\tClaudia Divine Eccleston\t1% PhiX
""")
        # List the projects
        projects = AutoProcess(mockdir.dirn).get_analysis_projects()
        expected = ('CDE', 'undetermined')
        self.assertEqual(len(projects), len(expected))
        for p in projects:
            self.assertTrue(isinstance(p, AnalysisProject))
            self.assertTrue(p.name in expected)
        for p in expected:
            matched_projects = [x for x in projects if x.name == p]
            self.assertEqual(len(matched_projects), 1)
Example #7
0
    def test_update_project_metadata_file_uncomment_existing_project(self):
        """
        AutoProcess.update_project_metadata_file: existing project is uncommented
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_K00879_0087_000000000-AGEW9',
            'hiseq',
            metadata={
                "run_number": 87,
                "source": "local"
            },
            top_dir=self.dirn)
        mockdir.create(no_project_dirs=True)
        # Create projects.info file with one project already listed
        with open(os.path.join(mockdir.dirn, "projects.info"), 'wt') as fp:
            fp.write(
                "#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments\n#CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\tKeep me"
            )
        # Update the projects.info file
        AutoProcess(mockdir.dirn).update_project_metadata_file()
        # Check output - missing project kept but commented out
        with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp:
            self.assertEqual(
                fp.read(),
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
AB\tAB1,AB2\t.\t.\t.\t.\t.\t.
CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\tKeep me
""")
Example #8
0
    def test_update_project_metadata_file_missing_from_bcl2fastq_output(self):
        """
        AutoProcess.update_project_metadata_file: make missing file and populate from bcl2fastq output
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_K00879_0087_000000000-AGEW9',
            'hiseq',
            metadata={
                "run_number": 87,
                "source": "local"
            },
            top_dir=self.dirn)
        mockdir.create(no_project_dirs=True)
        # Remove projects.info file
        os.remove(os.path.join(mockdir.dirn, "projects.info"))
        # Update the projects.info file
        AutoProcess(mockdir.dirn).update_project_metadata_file()
        # Check output
        with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp:
            self.assertEqual(
                fp.read(),
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
AB\tAB1,AB2\t.\t.\t.\t.\t.\t.
CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\t.
""")
Example #9
0
    def test_make_project_metadata_file_no_bcl2fastq_output(self):
        """
        AutoProcess.make_project_metadata_file: new 'projects.info' (no bcl2fastq output)
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_K00879_0087_000000000-AGEW9',
            'hiseq',
            metadata={
                "run_number": 87,
                "source": "local"
            },
            top_dir=self.dirn)
        mockdir.create(no_project_dirs=True)
        # Remove the projects.info file and the bcl2fastq output dir
        os.remove(os.path.join(mockdir.dirn, "projects.info"))
        shutil.rmtree(os.path.join(mockdir.dirn, "bcl2fastq"))
        # Create a new projects.info file
        AutoProcess(mockdir.dirn).make_project_metadata_file()
        # Check outputs
        self.assertTrue(
            os.path.exists(os.path.join(mockdir.dirn, "projects.info")))
        with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp:
            self.assertEqual(
                fp.read(),
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
""")
 def test_update_fastq_stats(self):
     """update_fastq_stats: generates statistics files
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '190104_M00879_0087_000000000-AGEW9',
         'miseq',
         metadata={ "instrument_datestamp": "190104" },
         top_dir=self.wd)
     mockdir.create(no_project_dirs=True)
     # Statistics files
     stats_files = (
         "statistics.info",
         "statistics_full.info",
         "per_lane_statistics.info",
         "per_lane_sample_stats.info",
     )
     # Check stats files don't already exist
     for filen in stats_files:
         self.assertFalse(os.path.exists(os.path.join(mockdir.dirn,filen)),
                          "%s: file exists, but shouldn't" %
                          filen)
     # Update (i.e. generate) stats
     ap = AutoProcess(mockdir.dirn)
     update_fastq_stats(ap)
     # Check files now exist
     for filen in stats_files:
         self.assertTrue(os.path.exists(os.path.join(mockdir.dirn,filen)),
                          "%s: missing" % filen)
    def test_analyse_barcodes_with_stored_bases_mask(self):
        """analyse_barcodes: test with stored bases mask
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_M00879_0087_000000000-AGEW9',
            'miseq',
            bases_mask='y76,I6,y76',
            metadata={ "instrument_datestamp": "160621" },
            top_dir=self.wd)
        mockdir.create(no_project_dirs=True)
        # Add data to Fastq files
        self._insert_fastq_reads(mockdir.dirn)
        # Populate the samplesheet
        sample_sheet = os.path.join(mockdir.dirn,"custom_SampleSheet.csv")
        with open(sample_sheet,'w') as fp:
            fp.write("""[Data]
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
AB1,AB1,,,D701,CGTGTAGG,D501,GACCTGAA,AB,
AB2,AB2,,,D702,CGTGTAGG,D501,ATGTAACT,AB,
CDE3,CDE3,,,D701,GACCTGAA,D501,CGTGTAGG,CDE,
CDE4,CDE4,,,D702,ATGTAACT,D501,CGTGTAGG,CDE,
""")
        # Analyse barcodes
        ap = AutoProcess(mockdir.dirn,
                         settings=self.settings)
        analyse_barcodes(ap)
        # Check outputs
        analysis_dir = os.path.join(
            self.wd,
            "160621_M00879_0087_000000000-AGEW9_analysis")
        self.assertTrue(os.path.isdir(
                os.path.join(analysis_dir,"barcode_analysis")),
                            "Missing dir: barcode_analysis")
        self.assertTrue(os.path.isdir(
                os.path.join(analysis_dir,"barcode_analysis","counts")),
                            "Missing dir: barcode_analysis/counts")
        for f in ("AB.AB1_S1_R1_001.fastq.gz.counts",
                  "AB.AB2_S2_R1_001.fastq.gz.counts",
                  "CDE.CDE3_S3_R1_001.fastq.gz.counts",
                  "CDE.CDE4_S4_R1_001.fastq.gz.counts",
                  "undetermined.Undetermined_S0_R1_001.fastq.gz.counts"):
            self.assertTrue(os.path.isfile(
                os.path.join(analysis_dir,"barcode_analysis","counts",f)),
                            "Missing file: %s" % f)
        self.assertTrue(os.path.isfile(
                os.path.join(analysis_dir,
                             "barcode_analysis",
                             "barcodes.report")),
                        "Missing file: barcodes.report")
        self.assertTrue(os.path.isfile(
                os.path.join(analysis_dir,
                             "barcode_analysis",
                             "barcodes.xls")),
                        "Missing file: barcodes.xls")
        self.assertTrue(os.path.isfile(
                os.path.join(analysis_dir,
                             "barcode_analysis",
                             "barcodes.html")),
                        "Missing file: barcodes.html")
 def test_publish_qc_with_projects_with_multiple_fastq_sets(self):
     """publish_qc: projects with multiple Fastq sets
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={ "run_number": 87,
                    "source": "local",
                    "instrument_datestamp": "160621" },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn)
     # Add processing report and QC outputs
     UpdateAnalysisDir(ap).add_processing_report()
     for project in ap.get_analysis_projects():
         UpdateAnalysisProject(project).add_qc_outputs()
     # Add additional fastq set for first project
     multi_fastqs_project = ap.get_analysis_projects()[0]
     UpdateAnalysisProject(multi_fastqs_project).add_fastq_set(
         "fastqs.extra",
         ("Alt1.r1.fastq.gz","Alt2.r1.fastq.gz"))
     UpdateAnalysisProject(multi_fastqs_project).add_qc_outputs(
         fastq_set="fastqs.extra",
         qc_dir="qc.extra")
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn,'QC')
     os.mkdir(publication_dir)
     # Publish
     publish_qc(ap,location=publication_dir)
     # Check outputs
     outputs = ["index.html",
                "processing_qc.html"]
     for project in ap.get_analysis_projects():
         # Standard QC outputs
         project_qc = "qc_report.%s.%s" % (project.name,
                                           os.path.basename(
                                               ap.analysis_dir))
         outputs.append(project_qc)
         outputs.append("%s.zip" % project_qc)
         outputs.append(os.path.join(project_qc,"qc_report.html"))
         outputs.append(os.path.join(project_qc,"qc"))
         # MultiQC output
         outputs.append("multiqc_report.%s.html" % project.name)
     # Additional QC for second fastq set in first project
     project_qc = "qc.extra_report.%s.%s" % (multi_fastqs_project.name,
                                             os.path.basename(
                                                 ap.analysis_dir))
     outputs.append(project_qc)
     outputs.append("%s.zip" % project_qc)
     outputs.append(os.path.join(project_qc,"qc.extra_report.html"))
     outputs.append(os.path.join(project_qc,"qc.extra"))
     # MultiQC output
     outputs.append("multiqc.extra_report.%s.html" %
                    multi_fastqs_project.name)
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f),"Missing %s" % f)
 def test_report_concise(self):
     """report: report run in 'concise' mode
     """
     # Make a mock auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '170901_M00879_0087_000000000-AGEW9',
         'miseq',
         metadata={
             "source": "testing",
             "run_number": 87,
             "assay": "Nextera"
         },
         project_metadata={
             "AB": {
                 "User": "******",
                 "Library type": "RNA-seq",
                 "Organism": "Human",
                 "PI": "Audrey Bower"
             },
             "CDE": {
                 "User": "******",
                 "Library type": "ChIP-seq",
                 "Organism": "Mouse",
                 "PI": "Colin Delaney Eccleston"
             }
         },
         top_dir=self.dirn)
     mockdir.create()
     # Make autoprocess instance
     ap = AutoProcess(analysis_dir=mockdir.dirn)
     # Generate concise report
     self.assertEqual(
         report_concise(ap),
         "Paired end: 'AB': Alison Bell, Human RNA-seq (PI: Audrey Bower) (2 samples); 'CDE': Charles David Edwards, Mouse ChIP-seq (PI: Colin Delaney Eccleston) (2 samples)"
     )
    def test_report_info_no_projects(self):
        """report: report run with no projects in 'info' mode
        """
        # Make a mock auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '170901_M00879_0087_000000000-AGEW9',
            'miseq',
            metadata={
                "source": "testing",
                "run_number": 87,
                "assay": "Nextera"
            },
            top_dir=self.dirn)
        mockdir.create(no_project_dirs=True)
        # Make autoprocess instance
        ap = AutoProcess(analysis_dir=mockdir.dirn)
        # Generate concise report
        expected = """Run reference: MISEQ_170901#87
Directory    : %s
Platform     : miseq
Unaligned dir: bcl2fastq

Summary of data in 'bcl2fastq' dir:

- AB: AB1-2 (2 paired end samples)
- CDE: CDE3-4 (2 paired end samples)

No analysis projects found""" % mockdir.dirn
        for o, e in zip(report_info(ap).split('\n'), expected.split('\n')):
            self.assertEqual(o, e)
 def test_make_fastqs_icell8_protocol(self):
     """make_fastqs: icell8 protocol
     """
     # Create mock source data
     illumina_run = MockIlluminaRun("171020_SN7001250_00002_AHGXXXX",
                                    "hiseq",
                                    top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     # Check that bases mask is as expected
     MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"),
                              assert_bases_mask="y25n76,I8,I8,y101")
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Do the test
     ap = AutoProcess()
     ap.setup(os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     ap.make_fastqs(protocol="icell8")
     # Check outputs
     analysis_dir = os.path.join(self.wd,
                                 "171020_SN7001250_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_SN7001250_00002_AHGXXXX"),
                    os.path.join("logs",
                                 "002_make_fastqs_icell8"), "bcl2fastq"):
         self.assertTrue(os.path.isdir(os.path.join(analysis_dir, subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info", "statistics_full.info",
                   "per_lane_statistics.info", "per_lane_sample_stats.info",
                   "projects.info", "processing_qc.html"):
         self.assertTrue(os.path.isfile(os.path.join(analysis_dir, filen)),
                         "Missing file: %s" % filen)
 def test_make_fastqs_specify_platform_via_metadata(self):
     """make_fastqs: implicitly specify the platform via metadata
     """
     # Create mock source data
     illumina_run = MockIlluminaRun("171020_UNKNOWN_00002_AHGXXXX",
                                    "miseq",
                                    top_dir=self.wd)
     illumina_run.create()
     # Create mock bcl2fastq
     MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"),
                              platform="miseq")
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Do the test
     ap = AutoProcess()
     ap.setup(os.path.join(self.wd, "171020_UNKNOWN_00002_AHGXXXX"))
     self.assertTrue(ap.params.sample_sheet is not None)
     self.assertTrue(ap.metadata.platform is None)
     ap.metadata["platform"] = "miseq"
     ap.make_fastqs(protocol="standard")
     # Check outputs
     analysis_dir = os.path.join(self.wd,
                                 "171020_UNKNOWN_00002_AHGXXXX_analysis")
     for subdir in (os.path.join("primary_data",
                                 "171020_UNKNOWN_00002_AHGXXXX"),
                    os.path.join("logs", "002_make_fastqs"), "bcl2fastq"):
         self.assertTrue(os.path.isdir(os.path.join(analysis_dir, subdir)),
                         "Missing subdir: %s" % subdir)
     for filen in ("statistics.info", "statistics_full.info",
                   "per_lane_statistics.info", "per_lane_sample_stats.info",
                   "projects.info", "processing_qc.html"):
         self.assertTrue(os.path.isfile(os.path.join(analysis_dir, filen)),
                         "Missing file: %s" % filen)
Example #17
0
 def test_publish_qc_missing_destination(self):
     """publish_qc: raise exception if destination doesn't exist
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing report and QC outputs
     UpdateAnalysisDir(ap).add_processing_report()
     for project in ap.get_analysis_projects():
         UpdateAnalysisProject(project).add_qc_outputs()
     # Reference publication area which doesn't exist
     publication_dir = os.path.join(self.dirn, 'QC')
     self.assertFalse(os.path.exists(publication_dir))
     # Publish
     self.assertRaises(Exception, publish_qc, ap, location=publication_dir)
     self.assertFalse(os.path.exists(publication_dir))
Example #18
0
 def test_publish_qc_processing_qc(self):
     """publish_qc: processing QC report only
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create(no_project_dirs=True)
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing report
     UpdateAnalysisDir(ap).add_processing_report()
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn, 'QC')
     os.mkdir(publication_dir)
     # Publish QC
     publish_qc(ap, location=publication_dir)
     # Check outputs
     outputs = ("index.html", "processing_qc.html")
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
Example #19
0
 def test_publish_qc_with_project_missing_qc(self):
     """publish_qc: raises exception if project has missing QC
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing report
     UpdateAnalysisDir(ap).add_processing_report()
     # Add QC outputs for subset of projects
     projects = ap.get_analysis_projects()[1:]
     for project in projects:
         UpdateAnalysisProject(project).add_qc_outputs()
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn, 'QC')
     os.mkdir(publication_dir)
     # Publish
     self.assertRaises(Exception, publish_qc, ap, location=publication_dir)
Example #20
0
 def test_publish_qc_with_cellranger_qc_multiple_lanes_subsets(self):
     """publish_qc: publish cellranger QC output (multiple subsets of lanes)
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create(no_project_dirs=True)
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing and cellranger QC reports
     UpdateAnalysisDir(ap).add_processing_report()
     UpdateAnalysisDir(ap).add_cellranger_qc_output(lanes="45")
     UpdateAnalysisDir(ap).add_cellranger_qc_output(lanes="78")
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn, 'QC')
     os.mkdir(publication_dir)
     # Publish
     publish_qc(ap, location=publication_dir)
     # Check outputs
     outputs = [
         "index.html", "processing_qc.html",
         "cellranger_qc_summary_45.html", "cellranger_qc_summary_78.html"
     ]
     # Do checks
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
Example #21
0
 def test_bcl2fastq2_can_be_loaded_after_rsync(self):
     """
     merge_fastq_dirs: rsynced bcl2fastq v2 output can be loaded
     """
     analysis_dir = self._setup_bcl2fastq2()
     # Merge the unaligned dirs
     self.ap = AutoProcess(analysis_dir, settings=self.settings)
     merge_fastq_dirs(self.ap, "bcl2fastq.lanes1-2", output_dir="bcl2fastq")
     # Check output directory exists and can be loaded
     self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq'))
     try:
         illumina_data = IlluminaData(analysis_dir,
                                      unaligned_dir='bcl2fastq')
     except Exception as ex:
         self.fail("exception loading merged directory: %s" % ex)
     # Rsync (with empty directories pruned)
     target_dir = os.path.join(self.dirn, "rsynced")
     os.mkdir(target_dir)
     applications.general.rsync(
         "%s/bcl2fastq" % self.ap.analysis_dir,
         target_dir,
         prune_empty_dirs=True).run_subprocess(
             log=os.path.join(self.dirn, "rsync.log"))
     # Check rsynced directory exists and can be loaded
     self._assert_dir_exists(os.path.join(target_dir, 'bcl2fastq'))
     try:
         illumina_data = IlluminaData(target_dir, unaligned_dir='bcl2fastq')
     except Exception as ex:
         self.fail("exception loading rsynced directory: %s" % ex)
Example #22
0
 def test_with_project_dirs_no_projects_dot_info_no_unaligned(self):
     """AutoProcess.get_analysis_projects: project dirs exist (no projects.info, no unaligned)
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local"
         },
         top_dir=self.dirn)
     mockdir.create()
     # Remove the projects.info file
     os.remove(os.path.join(mockdir.dirn, "projects.info"))
     # List the projects
     projects = AutoProcess(mockdir.dirn).get_analysis_projects()
     expected = ('AB', 'CDE', 'undetermined')
     self.assertEqual(len(projects), len(expected))
     for p in projects:
         self.assertTrue(isinstance(p, AnalysisProject))
         self.assertTrue(p.name in expected)
     for p in expected:
         matched_projects = [x for x in projects if x.name == p]
         self.assertEqual(len(matched_projects), 1)
    def test_setup_analysis_dirs_icell8_atac(self):
        """
        setup_analysis_dirs: test create new analysis dir for ICELL8 ATAC
        """
        # Make a mock auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '170901_M00879_0087_000000000-AGEW9',
            'miseq',
            metadata={"instrument_datestamp": "170901"},
            paired_end=True,
            top_dir=self.dirn)
        mockdir.create(no_project_dirs=True)
        # Add required metadata to 'projects.info'
        projects_info = os.path.join(mockdir.dirn, "projects.info")
        with open(projects_info, "w") as fp:
            fp.write(
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
AB\tAB1,AB2\tAlan Brown\tscATAC-seq\tICELL8 ATAC\tHuman\tAudrey Benson\t1% PhiX
""")
        # Add ICELL8 ATAC outputs
        xlsx_file = os.path.join(mockdir.dirn, "bcl2fastq", "Reports",
                                 "icell8_atac_stats.xlsx")
        with open(xlsx_file, 'w') as fp:
            fp.write("")
        # Expected data
        projects = {
            "AB": [
                "AB1_S1_R1_001.fastq.gz", "AB1_S1_R2_001.fastq.gz",
                "AB2_S2_R1_001.fastq.gz", "AB2_S2_R2_001.fastq.gz"
            ],
            "undetermined": [
                "Undetermined_S0_R1_001.fastq.gz",
                "Undetermined_S0_R2_001.fastq.gz"
            ]
        }
        # Check project dirs don't exist
        for project in projects:
            project_dir_path = os.path.join(mockdir.dirn, project)
            self.assertFalse(os.path.exists(project_dir_path))
        # Setup the project dirs
        ap = AutoProcess(analysis_dir=mockdir.dirn)
        setup_analysis_dirs(ap)
        # Check project dirs and contents
        for project in projects:
            project_dir_path = os.path.join(mockdir.dirn, project)
            self.assertTrue(os.path.exists(project_dir_path))
            # Check README.info file
            readme_file = os.path.join(project_dir_path, "README.info")
            self.assertTrue(os.path.exists(readme_file))
            # Check Fastqs
            fastqs_dir = os.path.join(project_dir_path, "fastqs")
            self.assertTrue(os.path.exists(fastqs_dir))
            for fq in projects[project]:
                fastq = os.path.join(fastqs_dir, fq)
                self.assertTrue(os.path.exists(fastq))
        # Check extra data for ICELL8 ATAC
        icell8_atac_xlsx = os.path.join(mockdir.dirn, "AB",
                                        "icell8_atac_stats.xlsx")
        self.assertTrue(os.path.exists(icell8_atac_xlsx))
    def test_report_summary_single_cell(self):
        """report: report single-cell run in 'summary' mode
        """
        # Make a mock auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '170901_M00879_0087_000000000-AGEW9',
            'miseq',
            metadata={
                "source": "testing",
                "run_number": 87,
                "bcl2fastq_software":
                "('/usr/bin/bcl2fastq', 'bcl2fastq', '2.17.1.14')",
                "cellranger_software":
                "('/usr/bin/cellranger', 'cellranger', '3.0.1')",
                "assay": "Nextera"
            },
            project_metadata={
                "AB": {
                    "User": "******",
                    "Library type": "scRNA-seq",
                    "Organism": "Human",
                    "PI": "Audrey Bower",
                    "Single cell platform": "ICELL8",
                    "Number of cells": 1311
                },
                "CDE": {
                    "User": "******",
                    "Library type": "ChIP-seq",
                    "Organism": "Mouse",
                    "PI": "Colin Delaney Eccleston",
                    "Comments": "Repeat of previous run"
                }
            },
            top_dir=self.dirn)
        mockdir.create()
        # Make autoprocess instance
        ap = AutoProcess(analysis_dir=mockdir.dirn)
        # Generate summary report
        expected = """MISEQ run #87 datestamped 170901
================================
Run name  : 170901_M00879_0087_000000000-AGEW9
Reference : MISEQ_170901#87
Platform  : MISEQ
Directory : %s
Endedness : Paired end
Bcl2fastq : bcl2fastq 2.17.1.14
Cellranger: cellranger 3.0.1
Assay     : Nextera

2 projects:
- 'AB':  Alison Bell           Human scRNA-seq (ICELL8) 2 samples/1311 cells (PI Audrey Bower)           
- 'CDE': Charles David Edwards Mouse ChIP-seq           2 samples            (PI Colin Delaney Eccleston)

Additional notes/comments:
- CDE: Repeat of previous run
""" % mockdir.dirn
        for o, e in zip(report_summary(ap).split('\n'), expected.split('\n')):
            self.assertEqual(o, e)
Example #25
0
    def test_bcl2fastq2_one_undetermined_fastq_pair(self):
        """
        merge_fastq_dirs: bcl2fastq v2 output with --no-lane-splitting, one undetermined Fastq pair
        """
        analysis_dir = self._setup_bcl2fastq2_no_lane_splitting()
        # Remove undetermined Fastqs from bcl2fastq.CDE
        for f in os.listdir(os.path.join(analysis_dir, "bcl2fastq.CDE")):
            if f.startswith("Undetermined_S0_"):
                os.remove(os.path.join(analysis_dir, "bcl2fastq.CDE", f))
        # Merge the unaligned dirs
        self.ap = AutoProcess(analysis_dir, settings=self.settings)
        merge_fastq_dirs(self.ap, "bcl2fastq.AB")
        # Check outputs
        self._assert_dir_exists(os.path.join(analysis_dir,
                                             'save.bcl2fastq.AB'))
        self._assert_dir_exists(
            os.path.join(analysis_dir, 'save.bcl2fastq.CDE'))
        self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq.AB'))
        self._assert_dir_doesnt_exist(
            os.path.join(analysis_dir, 'bcl2fastq.CDE'))
        for f in (
                'AB/AB1_S1_R1_001.fastq.gz',
                'AB/AB1_S1_R2_001.fastq.gz',
                'AB/AB2_S2_R1_001.fastq.gz',
                'AB/AB2_S2_R2_001.fastq.gz',
                'CDE/CDE3_S3_R1_001.fastq.gz',
                'CDE/CDE3_S3_R2_001.fastq.gz',
                'CDE/CDE4_S4_R1_001.fastq.gz',
                'CDE/CDE4_S4_R2_001.fastq.gz',
                'Undetermined_S0_R1_001.fastq.gz',
                'Undetermined_S0_R2_001.fastq.gz',
        ):
            self._assert_file_exists(
                os.path.join(analysis_dir, 'bcl2fastq.AB', f))
        # Check merge of undetermined fastqs
        undetermined_r1 = gzip.GzipFile(
            os.path.join(analysis_dir, 'bcl2fastq.AB',
                         'Undetermined_S0_R1_001.fastq.gz'),
            'rb').read().decode()
        expected_r1 = '\n'.join(fastq_reads_r1[:4]) + '\n'
        self.assertEqual(undetermined_r1, expected_r1)
        undetermined_r2 = gzip.GzipFile(
            os.path.join(analysis_dir, 'bcl2fastq.AB',
                         'Undetermined_S0_R2_001.fastq.gz'),
            'rb').read().decode()
        expected_r2 = '\n'.join(fastq_reads_r2[:4]) + '\n'
        self.assertEqual(undetermined_r2, expected_r2)
        # Check projects.info files
        self._assert_file_exists(
            os.path.join(analysis_dir, 'save.projects.info'))
        self._assert_file_exists(os.path.join(analysis_dir, 'projects.info'))
        with open(os.path.join(analysis_dir, 'projects.info'), 'rt') as fp:
            projects_info = fp.read()
        expected = """#Project	Samples	User	Library	SC_Platform	Organism	PI	Comments
AB	AB1,AB2	.	.	.	.	.	.
CDE	CDE3,CDE4	.	.	.	.	.	.
"""
        self.assertEqual(projects_info, expected)
    def test_report_processing_qc_empty_lane(self):
        """report_processing_qc: report with empty lane
        """
        # Create test data
        analysis_dir = os.path.join(self.wd,
                                    "180430_K00311_0001_ABCDEFGHXX_analysis")
        os.mkdir(analysis_dir)
        per_lane_sample_stats = os.path.join(analysis_dir,
                                             "per_lane_sample_stats.info")
        with open(per_lane_sample_stats, 'w') as fp:
            fp.write("""
Lane 1
Total reads = 0

Lane 2
Total reads = 0114447328
- CDE/CDE1	25058003	21.9%
- CDE/CDE2	0	0.0%
- CDE/CDE3	34509382	30.2%
- CDE/CDE4	27283286	23.8%
- Undetermined_indices/undetermined	27596657	24.1%
""")
        per_lane_statistics = os.path.join(analysis_dir,
                                           "per_lane_statistics.info")
        with open(per_lane_statistics, 'w') as fp:
            fp.write(
                """#Lane	Total reads	Assigned reads	Unassigned reads	%assigned	%unassigned
Lane 1	0	0	0	0.0	0.0
Lane 2	114447328	86850671	27596657	75.9	24.1
""")
        statistics_full = os.path.join(analysis_dir, "statistics_full.info")
        with open(statistics_full, 'w') as fp:
            fp.write(
                """#Project	Sample	Fastq	Size	Nreads	Paired_end	Read_number	L1	L2
AB	AB1	AB1_S1_R1_001.fastq.gz	0.0K	0	Y	1		
AB	AB1	AB1_S1_R2_001.fastq.gz	0.0K	0	Y	2		
AB	AB2	AB2_S2_R1_001.fastq.gz	0.0K	0	Y	1		
AB	AB2	AB2_S2_R2_001.fastq.gz	0.0K	0	Y	2		
AB	AB3	AB3_S3_R1_001.fastq.gz	0.0K	0	Y	1		
AB	AB3	AB3_S3_R2_001.fastq.gz	0.0k	0	Y	2		
AB	AB4	AB4_S4_R1_001.fastq.gz	1.1G	0	Y	1		
AB	AB4	AB4_S4_R2_001.fastq.gz	1.2G	0	Y	2		
CDE	CDE1	CDE1_S5_R1_001.fastq.gz	1.0G	0	Y	1      		25058003
CDE	CDE1	CDE1_S5_R2_001.fastq.gz	1.1G	0	Y	2		25058003
CDE	CDE2	CDE2_S6_R1_001.fastq.gz	0.0K	0	Y	1		
CDE	CDE2	CDE2_S6_R2_001.fastq.gz	0.0K	0	Y	2		
CDE	CDE3	CDE3_S7_R1_001.fastq.gz	1.4G	34509382	Y	1		34509382
CDE	CDE3	CDE3_S7_R2_001.fastq.gz	1.6G	34509382	Y	2		34509382
CDE	CDE4	CDE4_S8_R1_001.fastq.gz	1.1G	27283286	Y	1		27283286
CDE	CDE4	CDE4_S8_R2_001.fastq.gz	1.2G	27283286	Y	2		27283286
Undetermined_indices	undetermined	Undetermined_S0_R1_001.fastq.gz	1.0K	0	Y	1	0	
Undetermined_indices	undetermined	Undetermined_S0_R2_001.fastq.gz	1.0K	0	Y	2	0	
""")
        # Generate QC report
        output_html = os.path.join(analysis_dir, "processing_report.html")
        self.assertFalse(os.path.exists(output_html))
        report_processing_qc(AutoProcess(analysis_dir), output_html)
        self.assertTrue(os.path.exists(output_html))
Example #27
0
 def test_archive_to_final_via_staging(self):
     """archive: test copying to staging then final archive dir
     """
     # Make a mock auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '170901_M00879_0087_000000000-AGEW9',
         'miseq',
         metadata={"instrument_datestamp": "170901"},
         top_dir=self.dirn)
     mockdir.create()
     # Make a mock archive directory
     archive_dir = os.path.join(self.dirn, "archive")
     final_dir = os.path.join(archive_dir, "2017", "miseq")
     os.makedirs(final_dir)
     self.assertTrue(os.path.isdir(final_dir))
     self.assertEqual(len(os.listdir(final_dir)), 0)
     # Make autoprocess instance and set required metadata
     ap = AutoProcess(analysis_dir=mockdir.dirn)
     ap.set_metadata("source", "testing")
     ap.set_metadata("run_number", "87")
     # Do staging archiving op
     status = archive(ap,
                      archive_dir=archive_dir,
                      year='2017',
                      platform='miseq',
                      read_only_fastqs=False,
                      final=False)
     self.assertEqual(status, 0)
     # Check that staging dir exists
     staging_dir = os.path.join(
         final_dir, "__170901_M00879_0087_000000000-AGEW9_analysis.pending")
     final_archive_dir = os.path.join(
         final_dir, "170901_M00879_0087_000000000-AGEW9_analysis")
     self.assertTrue(os.path.exists(staging_dir))
     self.assertFalse(os.path.exists(final_archive_dir))
     self.assertEqual(len(os.listdir(final_dir)), 1)
     # Do final archiving op
     status = archive(ap,
                      archive_dir=archive_dir,
                      year='2017',
                      platform='miseq',
                      read_only_fastqs=False,
                      final=True)
     self.assertEqual(status, 0)
     self.assertFalse(os.path.exists(staging_dir))
     self.assertTrue(os.path.exists(final_archive_dir))
     self.assertEqual(len(os.listdir(final_dir)), 1)
     # Check contents
     dirs = ("AB", "CDE", "logs", "undetermined")
     for d in dirs:
         d = os.path.join(final_archive_dir, d)
         self.assertTrue(os.path.exists(d))
     files = ("auto_process.info", "custom_SampleSheet.csv",
              "metadata.info", "projects.info", "SampleSheet.orig.csv")
     for f in files:
         f = os.path.join(final_archive_dir, f)
         self.assertTrue(os.path.exists(f))
Example #28
0
    def test_casava_new_output_dir(self):
        """
        merge_fastq_dirs: casava/bcl2fastq v1.8.* output, new output dir
        """
        analysis_dir = self._setup_casava()
        # Merge the unaligned dirs
        self.ap = AutoProcess(analysis_dir)
        merge_fastq_dirs(self.ap, "bcl2fastq.lanes1-2", output_dir="bcl2fastq")
        # Check outputs
        self._assert_dir_exists(
            os.path.join(analysis_dir, 'save.bcl2fastq.lanes1-2'))
        self._assert_dir_exists(
            os.path.join(analysis_dir, 'save.bcl2fastq.lanes3-4'))
        self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq'))
        self._assert_dir_doesnt_exist(
            os.path.join(analysis_dir, 'bcl2fastq.lanes1-2'))
        self._assert_dir_doesnt_exist(
            os.path.join(analysis_dir, 'bcl2fastq.lanes3-4'))
        for f in (
                'Project_AB/Sample_AB1/AB1_GCCAAT_L001_R1_001.fastq.gz',
                'Project_AB/Sample_AB1/AB1_GCCAAT_L001_R2_001.fastq.gz',
                'Project_AB/Sample_AB2/AB2_AGTCAA_L001_R1_001.fastq.gz',
                'Project_AB/Sample_AB2/AB2_AGTCAA_L001_R2_001.fastq.gz',
                'Project_AB/Sample_AB1/AB1_GCCAAT_L002_R1_001.fastq.gz',
                'Project_AB/Sample_AB1/AB1_GCCAAT_L002_R2_001.fastq.gz',
                'Project_AB/Sample_AB2/AB2_AGTCAA_L002_R1_001.fastq.gz',
                'Project_AB/Sample_AB2/AB2_AGTCAA_L002_R2_001.fastq.gz',
                'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L003_R1_001.fastq.gz',
                'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L003_R2_001.fastq.gz',
                'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L003_R1_001.fastq.gz',
                'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L003_R2_001.fastq.gz',
                'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L004_R1_001.fastq.gz',
                'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L004_R2_001.fastq.gz',
                'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L004_R1_001.fastq.gz',
                'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L004_R2_001.fastq.gz',
                'Undetermined_indices/Sample_lane1/lane1_Undetermined_L001_R1_001.fastq.gz',
                'Undetermined_indices/Sample_lane1/lane1_Undetermined_L001_R2_001.fastq.gz',
                'Undetermined_indices/Sample_lane2/lane2_Undetermined_L002_R1_001.fastq.gz',
                'Undetermined_indices/Sample_lane2/lane2_Undetermined_L002_R2_001.fastq.gz',
                'Undetermined_indices/Sample_lane3/lane3_Undetermined_L003_R1_001.fastq.gz',
                'Undetermined_indices/Sample_lane3/lane3_Undetermined_L003_R2_001.fastq.gz',
                'Undetermined_indices/Sample_lane4/lane4_Undetermined_L004_R1_001.fastq.gz',
                'Undetermined_indices/Sample_lane4/lane4_Undetermined_L004_R2_001.fastq.gz'
        ):
            self._assert_file_exists(os.path.join(analysis_dir, 'bcl2fastq',
                                                  f))
        # Check projects.info files
        self._assert_file_exists(
            os.path.join(analysis_dir, 'save.projects.info'))
        self._assert_file_exists(os.path.join(analysis_dir, 'projects.info'))
        with open(os.path.join(analysis_dir, 'projects.info'), 'rt') as fp:
            projects_info = fp.read()
        expected = """#Project	Samples	User	Library	SC_Platform	Organism	PI	Comments
AB	AB1,AB2	.	.	.	.	.	.
CDE	CDE3,CDE4	.	.	.	.	.	.
"""
        self.assertEqual(projects_info, expected)
 def test_clone_analysis_dir_copy_fastqs(self):
     """
     clone: copies an analysis directory
     """
     # Make a source analysis dir
     analysis_dir = MockAnalysisDirFactory.bcl2fastq2(
         "190116_M01234_0002_AXYZ123",
         platform="miseq",
         paired_end=True,
         no_lane_splitting=False,
         include_stats_files=True,
         top_dir=self.dirn)
     analysis_dir.create()
     ap = AutoProcess(analysis_dir.dirn)
     UpdateAnalysisDir(ap).add_processing_report()
     ap.add_directory("primary_data/190116_M01234_0002_AXYZ123")
     # Make a copy
     clone_dir = os.path.join(self.dirn, "190116_M01234_0002_AXYZ123_copy")
     self.assertFalse(os.path.exists(clone_dir))
     clone(ap, clone_dir, copy_fastqs=True)
     self.assertTrue(os.path.isdir(clone_dir))
     # Check contents
     for subdir in ('logs', 'ScriptCode'):
         d = os.path.join(clone_dir, subdir)
         self.assertTrue(os.path.isdir(d), "Missing '%s'" % subdir)
     for filen in (
             'SampleSheet.orig.csv',
             'custom_SampleSheet.csv',
             'auto_process.info',
             'metadata.info',
             'statistics.info',
             'statistics_full.info',
             'per_lane_statistics.info',
             'per_lane_sample_stats.info',
             'processing_qc.html',
     ):
         f = os.path.join(clone_dir, filen)
         self.assertTrue(os.path.isfile(f), "Missing '%s'" % filen)
     # Check unaligned
     unaligned = os.path.join(clone_dir, 'bcl2fastq')
     self.assertTrue(os.path.isdir(unaligned))
     # Check primary data
     primary_data = os.path.join(clone_dir, 'primary_data',
                                 '190116_M01234_0002_AXYZ123')
     self.assertTrue(os.path.islink(primary_data))
     # Check projects
     for proj in ('AB', 'CDE', 'undetermined'):
         d = os.path.join(clone_dir, proj)
         self.assertTrue(os.path.isdir(d), "Missing '%s'" % proj)
     # Check parameters
     params = AnalysisDirParameters(
         filen=os.path.join(clone_dir, 'auto_process.info'))
     self.assertEqual(params.sample_sheet,
                      os.path.join(clone_dir, "custom_SampleSheet.csv"))
     self.assertEqual(params.primary_data_dir,
                      os.path.join(clone_dir, "primary_data"))
 def test_publish_qc_subset_of_projects(self):
     """publish_qc: only publish subset of projects
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={ "run_number": 87,
                    "source": "local",
                    "instrument_datestamp": "160621" },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn)
     # Add processing report
     UpdateAnalysisDir(ap).add_processing_report()
     # Add QC outputs for subset of projects
     projects = ap.get_analysis_projects()
     missing_projects = projects[1:]
     projects = projects[0:1]
     for project in ap.get_analysis_projects():
         UpdateAnalysisProject(project).add_qc_outputs()
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn,'QC')
     os.mkdir(publication_dir)
     # Publish
     publish_qc(ap,location=publication_dir,
                projects="AB*")
     # Check outputs
     outputs = ["index.html",
                "processing_qc.html"]
     for project in projects:
         # Standard QC outputs
         project_qc = "qc_report.%s.%s" % (project.name,
                                           os.path.basename(
                                               ap.analysis_dir))
         outputs.append(project_qc)
         outputs.append("%s.zip" % project_qc)
         outputs.append(os.path.join(project_qc,"qc_report.html"))
         outputs.append(os.path.join(project_qc,"qc"))
         # MultiQC output
         outputs.append("multiqc_report.%s.html" % project.name)
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f),"Missing %s" % f)
     # Check that missing projects weren't copied
     for project in missing_projects:
         self.assertFalse(os.path.exists(
             os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          "qc_report.%s.%s" % (project.name,
                                               os.path.basename(
                                                   ap.analysis_dir)))),
                          "%s exists in final dir, but shouldn't" %
                          project.name)