def create_run_summary_reports(run_id, conf): """ Copy main files and directory from hiseq run directory to save in report run data directory. Save data in two distinct directory on hiseq and on report, and tar.bz2 version Arguments: run_id: the run id conf: configuration dictionary """ hiseq_data_path = hiseq_run.find_hiseq_run_path(run_id, conf) tmp_base_path = conf[TMP_PATH_KEY] reports_data_base_path = conf[REPORTS_DATA_PATH_KEY] source_path = hiseq_data_path + '/' + run_id reports_data_path = common.get_report_run_data_path(run_id, conf) report_prefix = 'report_' hiseq_log_prefix = 'hiseq_log_' report_archive_file = report_prefix + run_id + '.tar.bz2' hiseq_log_archive_file = hiseq_log_prefix + run_id + '.tar.bz2' # Save quality control data tmp_path = tmp_base_path + '/' + run_id # Check if reports_data_path exists if not os.path.exists(reports_data_base_path): error("Failed to create report archive: Report directory does not exist", "Failed to create report archive: Report directory does not exist: " + reports_data_base_path, conf) return False # Check if temporary directory exists if not os.path.exists(tmp_base_path): error("Failed to create report archive: Temporary directory does not exist", "Failed to create report archive: Temporary directory does not exist: " + tmp_base_path, conf) return False # Check if reports archive exists if os.path.exists(reports_data_path + '/' + report_archive_file): error("Failed to create report archive: Report archive already exists for run " + run_id, "Failed to create report archive: Report archive already exists for run " + run_id + " : " + report_archive_file, conf) return False # Check if hiseq log archive exists if os.path.exists(reports_data_path + '/' + hiseq_log_archive_file): error("Failed to create report archive: Hiseq log archive already exists for run " + run_id, "Failed to create report archive: Hiseq log archive already exists for run " + run_id + " : " + hiseq_log_archive_file, conf) return False # Create if not exist archive directory for the run if not os.path.exists(reports_data_path): os.mkdir(reports_data_path) # Create run tmp directory if os.path.exists(tmp_path): error("Failed to create report archive: Temporary run data directory already exists for run " + run_id, "Failed to create report archive: Temporary run data directory already exists for run " + run_id + " : " + hiseq_log_archive_file, conf) else: os.mkdir(tmp_path) # Define set file to copy in report archive, check if exists (depend on parameters Illumina) files = ['InterOp', 'RunInfo.xml', 'runParameters.xml', 'RunParameters.xml'] files_to_copy = list_existing_files(source_path, files) if len(files_to_copy) == 0: common.log("WARNING", "Archive " + hiseq_log_archive_file + " not created: none file exists " + str(files) + ' in ' + source_path, conf) else: cmd = 'cd ' + quote(source_path) + ' && ' + \ 'cp -rp ' + files_to_copy + ' ' + quote(tmp_path) + ' && ' + \ 'cd ' + quote(tmp_base_path) + ' && ' + \ 'mv ' + quote(run_id) + ' ' + quote(hiseq_log_prefix + run_id) + ' && ' + \ 'tar cjf ' + quote(reports_data_path + '/' + hiseq_log_archive_file) + ' ' + quote(hiseq_log_prefix + run_id) + ' && ' +\ 'rm -rf ' + quote(tmp_path) # + ' && rm -rf ' + hiseq_log_prefix + run_id common.log("INFO", "exec: " + cmd, conf) if os.system(cmd) != 0: error("Failed to create report archive: Error while saving Illumina quality control for run " + run_id, "Failed to create report archive: Error saving Illumina quality control.\nCommand line:\n" + cmd, conf) return False # Save html reports if os.path.exists(tmp_path): cmd = 'rm -rf ' + quote(tmp_path) common.log("INFO", "exec: " + cmd, conf) if os.system(cmd) != 0: error("Failed to create report archive: Error while removing existing temporary directory", "Failed to create report archive: Error while removing existing temporary directory.\nCommand line:\n" + cmd, conf) return False os.mkdir(tmp_path) # Define set file to copy in report archive, check if exists (depend on parameters Illumina) if common.get_rta_major_version(run_id, conf) == 1: files = ['./Data/Status_Files', './Data/reports', './Data/Status.htm', './First_Base_Report.htm'] else: files = ['./Config', './Recipe', './RTALogs', './RTAConfiguration.xml', './RunCompletionStatus.xml'] files_to_copy = list_existing_files(source_path, files) if len(files_to_copy) == 0: common.log("WARNING", "Archive " + report_archive_file + " not created: none file exists " + str( files) + ' in ' + source_path, conf) else: cmd = 'cd ' + quote(source_path) + ' && ' + \ 'cp -rp ' + files_to_copy + ' ' + quote(tmp_path) + ' && ' + \ 'cd ' + quote(tmp_base_path) + ' && ' + \ 'mv ' + quote(run_id) + ' ' + quote(report_prefix + run_id) + ' && ' + \ 'tar cjf ' + quote(reports_data_path + '/' + report_archive_file) + ' ' + quote(report_prefix + run_id) + ' && ' + \ 'mv ' + quote(report_prefix + run_id) + ' ' + quote(reports_data_path) common.log("INFO", "exec: " + cmd, conf) if os.system(cmd) != 0: error("Failed to create report archive: Error while saving Illumina HTML reports for run " + run_id, "Failed to create report archive: Error saving Illumina HTML reports.\nCommand line:\n" + cmd, conf) return False # Create index.hml file common.create_html_index_file(conf, run_id, [HISEQ_STEP_KEY]) # Set read only the report directory common.chmod_files_in_dir(reports_data_path + '/' + report_prefix, None, conf) # Set read only archives files common.chmod(reports_data_path + '/' + report_archive_file, conf) common.chmod(reports_data_path + '/' + hiseq_log_archive_file, conf) return True
def demux(run_id, conf): """Add a processed run id to the list of the run ids. Arguments: run_id: The run id conf: configuration dictionary """ start_time = time.time() common.log('INFO', 'Demux step: Starting', conf) reports_data_base_path = conf[REPORTS_DATA_PATH_KEY] reports_data_path = common.get_report_run_data_path(run_id, conf) samplesheet_filename = build_samplesheet_filename(run_id, conf) bcl2fastq_samplesheet_path = conf[TMP_PATH_KEY] + '/' + samplesheet_filename + '.csv' input_run_data_path = common.get_input_run_data_path(run_id, conf) if input_run_data_path is None: return False fastq_output_dir = conf[FASTQ_DATA_PATH_KEY] + '/' + run_id basecall_stats_prefix = 'basecall_stats_' basecall_stats_file = basecall_stats_prefix + run_id + '.tar.bz2' # Check if root input bcl data directory exists if not os.path.exists(input_run_data_path): error("Basecalling data directory does not exist", "Basecalling data directory does not exist: " + str(input_run_data_path), conf) # return False # Check if root input fastq data directory exists if not common.is_dir_exists(FASTQ_DATA_PATH_KEY, conf): error("FASTQ data directory does not exist", "FASTQ data directory does not exist: " + conf[FASTQ_DATA_PATH_KEY], conf) return False # Check if bcl2fastq samplesheets path exists if not common.is_dir_exists(BCL2FASTQ_SAMPLESHEETS_PATH_KEY, conf): error("Bcl2fastq samplesheet directory does not exist", "Bcl2fastq samplesheet directory does not exist: " + conf[BCL2FASTQ_SAMPLESHEETS_PATH_KEY], conf) return False # Check if bcl2fastq basedir path exists if not common.is_conf_value_equals_true(BCL2FASTQ_USE_DOCKER_KEY, conf): if not common.is_dir_exists(BCL2FASTQ_PATH_KEY, conf): error("Bcl2fastq directory does not exist", "Bcl2fastq directory does not exist: " + conf[BCL2FASTQ_PATH_KEY], conf) return False # Check if temporary directory exists if not common.is_dir_exists(TMP_PATH_KEY, conf): error("Temporary directory does not exist", "Temporary directory does not exist: " + conf[TMP_PATH_KEY], conf) return False # Check if reports_data_path exists if not os.path.exists(reports_data_base_path): error("Report directory does not exist", "Report directory does not exist: " + reports_data_base_path, conf) return False # Create if not exist report directory for the run if not os.path.exists(reports_data_path): os.mkdir(reports_data_path) # Check if basecall stats archive exists if os.path.exists(reports_data_path + '/' + basecall_stats_file): error('Basecall stats archive already exists for run ' + run_id, 'Basecall stats archive already exists for run ' + run_id + ': ' + basecall_stats_file, conf) return False # Check if the output directory already exists if os.path.exists(fastq_output_dir): error("FASTQ output directory already exists for run " + run_id, 'FASTQ output directory already exists for run ' + run_id + ': ' + fastq_output_dir, conf) return False # Compute disk usage and disk free to check if enough disk space is available input_path_du = common.du(input_run_data_path) output_df = common.df(conf[FASTQ_DATA_PATH_KEY]) du_factor = float(conf[DEMUX_SPACE_FACTOR_KEY]) space_needed = input_path_du * du_factor common.log("WARNING", "Demux step: input disk usage: " + str(input_path_du), conf) common.log("WARNING", "Demux step: output disk free: " + str(output_df), conf) common.log("WARNING", "Demux step: space needed: " + str(space_needed), conf) common.log("CONFIG", "Bcl2fastq Docker mode: " + str( common.is_conf_value_equals_true(Settings.BCL2FASTQ_USE_DOCKER_KEY, conf)), conf) # Check if free space is available if output_df < space_needed: error("Not enough disk space to perform demultiplexing for run " + run_id, "Not enough disk space to perform demultiplexing for run " + run_id + '.\n%.2f Gb' % (space_needed / 1024 / 1024 / 1024) + ' is needed (factor x' + str( du_factor) + ') on ' + fastq_output_dir + '.', conf) return False # Load RunInfo object run_info = RunInfo.parse(input_run_data_path + '/RunInfo.xml') # Load samplesheet samplesheet, original_samplesheet_path = load_samplesheet(run_id, input_run_data_path, samplesheet_filename, conf) if samplesheet is None: return False # Update samplesheet if not update_samplesheet(samplesheet, run_id, run_info.getFlowCellLaneCount(), conf): return False # Check samplesheet check_result, samplesheet_warnings = check_samplesheet(samplesheet, run_id, run_info.getFlowCell(), conf) if not check_result: return False # Get the number of mismatches nb_mismatch = get_bcl2fastq_mismatches(samplesheet, conf[BCL2FASTQ_MISMATCHES_KEY]) # Write final samplesheet if not write_bcl2fastq_samplesheet(samplesheet, bcl2fastq_samplesheet_path, conf): return False # Run demultiplexing if common.is_conf_value_equals_true(Settings.BCL2FASTQ_USE_DOCKER_KEY, conf): # With image docker if not demux_run_with_docker(run_id, input_run_data_path, fastq_output_dir, bcl2fastq_samplesheet_path, nb_mismatch, conf): return False else: if not demux_run_standalone(run_id, input_run_data_path, fastq_output_dir, bcl2fastq_samplesheet_path, nb_mismatch, conf): return False # Check if the output directory has been created if not os.path.exists(fastq_output_dir): error("Error while demultiplexing run " + run_id + ' on ' + common.get_instrument_name(run_id, conf), 'Error while demultiplexing run ' + run_id + '.\n' + 'The output directory of bcl2fastq has been created: ' + fastq_output_dir, conf) return False # Check if the output directory has been created if os.path.isfile(fastq_output_dir): error("Error while demultiplexing run " + run_id + ' on ' + common.get_instrument_name(run_id, conf), 'Error while demultiplexing run ' + run_id + '.\n' + 'The output directory of bcl2fastq is a file instead of a directory: ' + fastq_output_dir, conf) return False # Copy bcl2fastq log to output directory cmd = 'cp ' + quote(conf[TMP_PATH_KEY]) + '/bcl2fastq_output_' + run_id + '.* ' + quote(fastq_output_dir) common.log("INFO", "exec: " + cmd, conf) if os.system(cmd) != 0: error("Error while copying bcl2fastq log to the output FASTQ directory" + run_id_msg, 'Error while copying bcl2fastq log to the output FASTQ directory.\nCommand line:\n' + cmd, conf) return False # The output directory must be read only if not common.chmod_files_in_dir(fastq_output_dir, ".fastq", conf): error("Error while setting the output FASTQ directory to read only" + run_id_msg, 'Error while setting the output FASTQ directory to read only.\nCommand line:\n' + cmd, conf) return False if not check_if_output_fastq_files_exists(fastq_output_dir): error("Error with bcl2fastq execution for run " + run_id, "Error with bcl2fastq execution for run " + run_id + " no FASTQ file found in " + fastq_output_dir, conf) return False # Copy samplesheet to output directory cmd = 'cp -p ' + quote(bcl2fastq_samplesheet_path) + ' ' + quote(fastq_output_dir + '/SampleSheet.csv') common.log("INFO", "exec: " + cmd, conf) if os.system(cmd) != 0: error("Error while copying samplesheet file to FASTQ directory for run " + run_id, 'Error while copying samplesheet file to FASTQ directory.\nCommand line:\n' + cmd, conf) return False # Create archives on demultiplexing statistics if not archive_demux_stat(run_id, fastq_output_dir, reports_data_path, basecall_stats_file, basecall_stats_prefix, bcl2fastq_samplesheet_path, conf): return False # Archive samplesheet if not archive_samplesheet(run_id, original_samplesheet_path, bcl2fastq_samplesheet_path, conf): return False # Remove temporary samplesheet files if os.path.exists(bcl2fastq_samplesheet_path): os.remove(bcl2fastq_samplesheet_path) # Create index.hml file common.create_html_index_file(conf, run_id, [Settings.HISEQ_STEP_KEY, Settings.DEMUX_STEP_KEY]) df_in_bytes = common.df(fastq_output_dir) du_in_bytes = common.du(fastq_output_dir) df = df_in_bytes / (1024 * 1024 * 1024) du = du_in_bytes / (1024 * 1024 * 1024) common.log("WARNING", "Demux step: output disk free after demux: " + str(df_in_bytes), conf) common.log("WARNING", "Demux step: space used by demux: " + str(du_in_bytes), conf) duration = time.time() - start_time msg = 'Ending demultiplexing with ' + nb_mismatch + ' mismatch(es) for run ' + run_id + '.' + \ '\nJob finished at ' + common.time_to_human_readable(time.time()) + \ ' without error in ' + common.duration_to_human_readable(duration) + '.\n\n' + \ 'FASTQ files for this run ' + \ 'can be found in the following directory:\n ' + fastq_output_dir if samplesheet_warnings.size() > 0: msg += '\n\nSamplesheet warnings:' for warn in samplesheet_warnings: msg += "\n - " + warn # Add path to report if reports.url exists if common.is_conf_key_exists(REPORTS_URL_KEY, conf): msg += '\n\nRun reports can be found at following location:\n ' + conf[REPORTS_URL_KEY] + '/' + run_id msg += '\n\nFor this task %.2f GB has been used and %.2f GB still free.' % (du, df) common.send_msg('[Aozan] Ending demultiplexing for run ' + run_id + ' on ' + common.get_instrument_name(run_id, conf), msg, False, conf) common.log('INFO', 'Demux step: successful in ' + common.duration_to_human_readable(duration), conf) return True
common.log("INFO", "exec: " + cmd, conf) if os.system(cmd) != 0: error("Error while saving the QC archive file for " + run_id, 'Error while saving the QC archive file.\nCommand line:\n' + cmd, conf) return False # Set read only basecall stats archives files common.chmod(reports_data_path + '/qc_' + run_id + '.tar.bz2', conf) # Check if the report has been generated if not os.path.exists(html_report_file): error("Error while computing QC report for run " + run_id + ".", "No HTML report generated", conf) return False # The output directory must be read only if not common.chmod_files_in_dir(qc_output_dir, None, conf): error("Error while setting the output QC directory to read only for run " + run_id, 'Error while setting the output QC directory to read only.\nCommand line:\n' + cmd, conf) return False # Create index.hml file sessions = [Settings.HISEQ_STEP_KEY, Settings.DEMUX_STEP_KEY, Settings.QC_STEP_KEY] common.create_html_index_file(conf, run_id, sessions) df_in_bytes = common.df(qc_output_dir) du_in_bytes = common.du(qc_output_dir) df = df_in_bytes / (1024 * 1024 * 1024) du = du_in_bytes / (1024 * 1024) common.log("WARNING", "QC step: output disk free after QC: " + str(df_in_bytes), conf) common.log("WARNING", "QC step: space used by QC: " + str(du_in_bytes), conf)