def create_run_summary_reports(run_id, conf):
    """ Copy main files and directory from hiseq run directory to save in report run data directory.
        Save data in two distinct directory on hiseq and on report, and tar.bz2 version

    Arguments:
        run_id: the run id
        conf: configuration dictionary
    """

    hiseq_data_path = hiseq_run.find_hiseq_run_path(run_id, conf)
    tmp_base_path = conf[TMP_PATH_KEY]
    reports_data_base_path = conf[REPORTS_DATA_PATH_KEY]

    source_path = hiseq_data_path + '/' + run_id
    reports_data_path = common.get_report_run_data_path(run_id, conf)
    report_prefix = 'report_'
    hiseq_log_prefix = 'hiseq_log_'
    report_archive_file = report_prefix + run_id + '.tar.bz2'
    hiseq_log_archive_file = hiseq_log_prefix + run_id + '.tar.bz2'

    # Save quality control data
    tmp_path = tmp_base_path + '/' + run_id

    # Check if reports_data_path exists
    if not os.path.exists(reports_data_base_path):
        error("Failed to create report archive: Report directory does not exist",
              "Failed to create report archive: Report directory does not exist: " + reports_data_base_path, conf)
        return False

    # Check if temporary directory exists
    if not os.path.exists(tmp_base_path):
        error("Failed to create report archive: Temporary directory does not exist",
              "Failed to create report archive: Temporary directory does not exist: " + tmp_base_path, conf)
        return False

    # Check if reports archive exists
    if os.path.exists(reports_data_path + '/' + report_archive_file):
        error("Failed to create report archive: Report archive already exists for run " + run_id,
              "Failed to create report archive: Report archive already exists for run " + run_id + " : " + report_archive_file, conf)
        return False

    # Check if hiseq log archive exists
    if os.path.exists(reports_data_path + '/' + hiseq_log_archive_file):
        error("Failed to create report archive: Hiseq log archive already exists for run " + run_id,
              "Failed to create report archive: Hiseq log archive already exists for run " + run_id + " : " + hiseq_log_archive_file, conf)
        return False

    # Create if not exist archive directory for the run
    if not os.path.exists(reports_data_path):
        os.mkdir(reports_data_path)

    # Create run tmp  directory
    if os.path.exists(tmp_path):
        error("Failed to create report archive: Temporary run data directory already exists for run " + run_id,
              "Failed to create report archive: Temporary run data directory already exists for run " + run_id + " : " + hiseq_log_archive_file, conf)
    else:
        os.mkdir(tmp_path)

    # Define set file to copy in report archive, check if exists (depend on parameters Illumina)
    files = ['InterOp', 'RunInfo.xml', 'runParameters.xml', 'RunParameters.xml']
    files_to_copy = list_existing_files(source_path, files)

    if len(files_to_copy) == 0:
        common.log("WARNING",
                   "Archive " + hiseq_log_archive_file + " not created: none file exists " + str(files) +
                   ' in ' + source_path, conf)
    else:
        cmd = 'cd ' + quote(source_path) + ' && ' + \
              'cp -rp ' + files_to_copy + ' ' + quote(tmp_path) + ' && ' + \
              'cd ' + quote(tmp_base_path) + ' && ' + \
              'mv ' + quote(run_id) + ' ' + quote(hiseq_log_prefix + run_id) + ' && ' + \
              'tar cjf ' + quote(reports_data_path + '/' + hiseq_log_archive_file) + ' ' + quote(hiseq_log_prefix + run_id) + ' && ' +\
              'rm -rf ' + quote(tmp_path)
        # + ' && rm -rf ' + hiseq_log_prefix + run_id

        common.log("INFO", "exec: " + cmd, conf)
        if os.system(cmd) != 0:
            error("Failed to create report archive: Error while saving Illumina quality control for run " + run_id,
                  "Failed to create report archive: Error saving Illumina quality control.\nCommand line:\n" + cmd, conf)
            return False

    # Save html reports
    if os.path.exists(tmp_path):
        cmd = 'rm -rf ' + quote(tmp_path)

        common.log("INFO", "exec: " + cmd, conf)
        if os.system(cmd) != 0:
            error("Failed to create report archive: Error while removing existing temporary directory",
                  "Failed to create report archive: Error while removing existing temporary directory.\nCommand line:\n" + cmd, conf)
            return False

    os.mkdir(tmp_path)

    # Define set file to copy in report archive, check if exists (depend on parameters Illumina)
    if common.get_rta_major_version(run_id, conf) == 1:
        files = ['./Data/Status_Files', './Data/reports', './Data/Status.htm', './First_Base_Report.htm']
    else:
        files = ['./Config', './Recipe', './RTALogs', './RTAConfiguration.xml', './RunCompletionStatus.xml']

    files_to_copy = list_existing_files(source_path, files)
    if len(files_to_copy) == 0:
        common.log("WARNING", "Archive " + report_archive_file + " not created: none file exists " + str(
            files) + ' in ' + source_path, conf)
    else:
        cmd = 'cd ' + quote(source_path) + ' && ' + \
              'cp -rp ' + files_to_copy + ' ' + quote(tmp_path) + ' && ' + \
              'cd ' + quote(tmp_base_path) + ' && ' + \
              'mv ' + quote(run_id) + ' ' + quote(report_prefix + run_id) + ' && ' + \
              'tar cjf ' + quote(reports_data_path + '/' + report_archive_file) + ' ' + quote(report_prefix + run_id) + ' && ' + \
              'mv ' + quote(report_prefix + run_id) + ' ' + quote(reports_data_path)

        common.log("INFO", "exec: " + cmd, conf)
        if os.system(cmd) != 0:
            error("Failed to create report archive: Error while saving Illumina HTML reports for run " + run_id,
                  "Failed to create report archive: Error saving Illumina HTML reports.\nCommand line:\n" + cmd, conf)
            return False

    # Create index.hml file
    common.create_html_index_file(conf, run_id, [HISEQ_STEP_KEY])

    # Set read only the report directory
    common.chmod_files_in_dir(reports_data_path + '/' + report_prefix, None, conf)

    # Set read only archives files
    common.chmod(reports_data_path + '/' + report_archive_file, conf)
    common.chmod(reports_data_path + '/' + hiseq_log_archive_file, conf)

    return True
Esempio n. 2
0
def demux(run_id, conf):
    """Add a processed run id to the list of the run ids.

    Arguments:
        run_id: The run id
        conf: configuration dictionary
    """

    start_time = time.time()
    common.log('INFO', 'Demux step: Starting', conf)

    reports_data_base_path = conf[REPORTS_DATA_PATH_KEY]
    reports_data_path = common.get_report_run_data_path(run_id, conf)

    samplesheet_filename = build_samplesheet_filename(run_id, conf)
    bcl2fastq_samplesheet_path = conf[TMP_PATH_KEY] + '/' + samplesheet_filename + '.csv'

    input_run_data_path = common.get_input_run_data_path(run_id, conf)

    if input_run_data_path is None:
        return False

    fastq_output_dir = conf[FASTQ_DATA_PATH_KEY] + '/' + run_id

    basecall_stats_prefix = 'basecall_stats_'
    basecall_stats_file = basecall_stats_prefix + run_id + '.tar.bz2'

    # Check if root input bcl data directory exists
    if not os.path.exists(input_run_data_path):
        error("Basecalling data directory does not exist",
              "Basecalling data directory does not exist: " + str(input_run_data_path), conf)
        # return False

    # Check if root input fastq data directory exists
    if not common.is_dir_exists(FASTQ_DATA_PATH_KEY, conf):
        error("FASTQ data directory does not exist",
              "FASTQ data directory does not exist: " + conf[FASTQ_DATA_PATH_KEY], conf)
        return False

    # Check if bcl2fastq samplesheets path exists
    if not common.is_dir_exists(BCL2FASTQ_SAMPLESHEETS_PATH_KEY, conf):
        error("Bcl2fastq samplesheet directory does not exist",
              "Bcl2fastq samplesheet directory does not exist: " + conf[BCL2FASTQ_SAMPLESHEETS_PATH_KEY], conf)
        return False

    # Check if bcl2fastq basedir path exists
    if not common.is_conf_value_equals_true(BCL2FASTQ_USE_DOCKER_KEY, conf):
        if not common.is_dir_exists(BCL2FASTQ_PATH_KEY, conf):
            error("Bcl2fastq directory does not exist",
                  "Bcl2fastq directory does not exist: " + conf[BCL2FASTQ_PATH_KEY], conf)
            return False

    # Check if temporary directory exists
    if not common.is_dir_exists(TMP_PATH_KEY, conf):
        error("Temporary directory does not exist",
              "Temporary directory does not exist: " + conf[TMP_PATH_KEY], conf)
        return False

    # Check if reports_data_path exists
    if not os.path.exists(reports_data_base_path):
        error("Report directory does not exist",
              "Report directory does not exist: " + reports_data_base_path, conf)
        return False

    # Create if not exist report directory for the run
    if not os.path.exists(reports_data_path):
        os.mkdir(reports_data_path)

    # Check if basecall stats archive exists
    if os.path.exists(reports_data_path + '/' + basecall_stats_file):
        error('Basecall stats archive already exists for run ' + run_id,
              'Basecall stats archive already exists for run ' + run_id + ': ' + basecall_stats_file, conf)
        return False

    # Check if the output directory already exists
    if os.path.exists(fastq_output_dir):
        error("FASTQ output directory already exists for run " + run_id,
              'FASTQ output directory already exists for run ' + run_id + ': ' + fastq_output_dir, conf)
        return False

    # Compute disk usage and disk free to check if enough disk space is available
    input_path_du = common.du(input_run_data_path)
    output_df = common.df(conf[FASTQ_DATA_PATH_KEY])
    du_factor = float(conf[DEMUX_SPACE_FACTOR_KEY])
    space_needed = input_path_du * du_factor

    common.log("WARNING", "Demux step: input disk usage: " + str(input_path_du), conf)
    common.log("WARNING", "Demux step: output disk free: " + str(output_df), conf)
    common.log("WARNING", "Demux step: space needed: " + str(space_needed), conf)

    common.log("CONFIG", "Bcl2fastq Docker mode: " + str(
        common.is_conf_value_equals_true(Settings.BCL2FASTQ_USE_DOCKER_KEY, conf)), conf)

    # Check if free space is available
    if output_df < space_needed:
        error("Not enough disk space to perform demultiplexing for run " + run_id,
              "Not enough disk space to perform demultiplexing for run " + run_id +
              '.\n%.2f Gb' % (space_needed / 1024 / 1024 / 1024) + ' is needed (factor x' + str(
                  du_factor) + ') on ' + fastq_output_dir + '.', conf)
        return False

    # Load RunInfo object
    run_info = RunInfo.parse(input_run_data_path + '/RunInfo.xml')

    # Load samplesheet
    samplesheet, original_samplesheet_path = load_samplesheet(run_id, input_run_data_path, samplesheet_filename, conf)

    if samplesheet is None:
        return False

    # Update samplesheet
    if not update_samplesheet(samplesheet, run_id, run_info.getFlowCellLaneCount(), conf):
        return False

    # Check samplesheet
    check_result, samplesheet_warnings = check_samplesheet(samplesheet, run_id, run_info.getFlowCell(), conf)
    if not check_result:
        return False

    # Get the number of mismatches
    nb_mismatch = get_bcl2fastq_mismatches(samplesheet, conf[BCL2FASTQ_MISMATCHES_KEY])

    # Write final samplesheet
    if not write_bcl2fastq_samplesheet(samplesheet, bcl2fastq_samplesheet_path, conf):
        return False

    # Run demultiplexing
    if common.is_conf_value_equals_true(Settings.BCL2FASTQ_USE_DOCKER_KEY, conf):
        # With image docker
        if not demux_run_with_docker(run_id, input_run_data_path, fastq_output_dir, bcl2fastq_samplesheet_path,
                                     nb_mismatch, conf):
            return False
    else:
        if not demux_run_standalone(run_id, input_run_data_path, fastq_output_dir, bcl2fastq_samplesheet_path,
                                    nb_mismatch, conf):
            return False

    # Check if the output directory has been created
    if not os.path.exists(fastq_output_dir):
        error("Error while demultiplexing run " + run_id + ' on ' + common.get_instrument_name(run_id, conf),
              'Error while demultiplexing run ' + run_id + '.\n' +
              'The output directory of bcl2fastq has been created: ' + fastq_output_dir, conf)
        return False

    # Check if the output directory has been created
    if os.path.isfile(fastq_output_dir):
        error("Error while demultiplexing run " + run_id + ' on ' + common.get_instrument_name(run_id, conf),
              'Error while demultiplexing run ' + run_id + '.\n' +
              'The output directory of bcl2fastq is a file instead of a directory: ' + fastq_output_dir, conf)
        return False

    # Copy bcl2fastq log to output directory
    cmd = 'cp ' + quote(conf[TMP_PATH_KEY]) + '/bcl2fastq_output_' + run_id + '.* ' + quote(fastq_output_dir)
    common.log("INFO", "exec: " + cmd, conf)
    if os.system(cmd) != 0:
        error("Error while copying bcl2fastq log to the output FASTQ directory" + run_id_msg,
              'Error while copying bcl2fastq log to the output FASTQ directory.\nCommand line:\n' + cmd, conf)
        return False

    # The output directory must be read only
    if not common.chmod_files_in_dir(fastq_output_dir, ".fastq", conf):
        error("Error while setting the output FASTQ directory to read only" + run_id_msg,
              'Error while setting the output FASTQ directory to read only.\nCommand line:\n' + cmd, conf)
        return False


    if not check_if_output_fastq_files_exists(fastq_output_dir):
        error("Error with bcl2fastq execution for run " + run_id,
              "Error with bcl2fastq execution for run " + run_id + " no FASTQ file found in " + fastq_output_dir,
              conf)
        return False

    # Copy samplesheet to output directory
    cmd = 'cp -p ' + quote(bcl2fastq_samplesheet_path) + ' ' + quote(fastq_output_dir + '/SampleSheet.csv')
    common.log("INFO", "exec: " + cmd, conf)
    if os.system(cmd) != 0:
        error("Error while copying samplesheet file to FASTQ directory for run " + run_id,
              'Error while copying samplesheet file to FASTQ directory.\nCommand line:\n' + cmd, conf)
        return False

    # Create archives on demultiplexing statistics
    if not archive_demux_stat(run_id, fastq_output_dir, reports_data_path, basecall_stats_file,
                              basecall_stats_prefix, bcl2fastq_samplesheet_path, conf):
        return False

    # Archive samplesheet
    if not archive_samplesheet(run_id, original_samplesheet_path, bcl2fastq_samplesheet_path, conf):
        return False

    # Remove temporary samplesheet files
    if os.path.exists(bcl2fastq_samplesheet_path):
        os.remove(bcl2fastq_samplesheet_path)

    # Create index.hml file
    common.create_html_index_file(conf, run_id, [Settings.HISEQ_STEP_KEY, Settings.DEMUX_STEP_KEY])

    df_in_bytes = common.df(fastq_output_dir)
    du_in_bytes = common.du(fastq_output_dir)
    df = df_in_bytes / (1024 * 1024 * 1024)
    du = du_in_bytes / (1024 * 1024 * 1024)

    common.log("WARNING", "Demux step: output disk free after demux: " + str(df_in_bytes), conf)
    common.log("WARNING", "Demux step: space used by demux: " + str(du_in_bytes), conf)

    duration = time.time() - start_time

    msg = 'Ending demultiplexing with ' + nb_mismatch + ' mismatch(es) for run ' + run_id + '.' + \
          '\nJob finished at ' + common.time_to_human_readable(time.time()) + \
          ' without error in ' + common.duration_to_human_readable(duration) + '.\n\n' + \
          'FASTQ files for this run ' + \
          'can be found in the following directory:\n  ' + fastq_output_dir

    if samplesheet_warnings.size() > 0:
        msg += '\n\nSamplesheet warnings:'
        for warn in samplesheet_warnings:
            msg += "\n  - " + warn

    # Add path to report if reports.url exists
    if common.is_conf_key_exists(REPORTS_URL_KEY, conf):
        msg += '\n\nRun reports can be found at following location:\n  ' + conf[REPORTS_URL_KEY] + '/' + run_id

    msg += '\n\nFor this task %.2f GB has been used and %.2f GB still free.' % (du, df)

    common.send_msg('[Aozan] Ending demultiplexing for run ' + run_id + ' on ' +
                    common.get_instrument_name(run_id, conf), msg, False, conf)
    common.log('INFO', 'Demux step: successful in ' + common.duration_to_human_readable(duration), conf)

    return True
Esempio n. 3
0
    common.log("INFO", "exec: " + cmd, conf)
    if os.system(cmd) != 0:
        error("Error while saving the QC archive file for " + run_id,
              'Error while saving the  QC archive file.\nCommand line:\n' + cmd, conf)
        return False

    # Set read only basecall stats archives files
    common.chmod(reports_data_path + '/qc_' + run_id + '.tar.bz2', conf)

    # Check if the report has been generated
    if not os.path.exists(html_report_file):
        error("Error while computing QC report for run " + run_id + ".", "No HTML report generated", conf)
        return False

    # The output directory must be read only
    if not common.chmod_files_in_dir(qc_output_dir, None, conf):
        error("Error while setting the output QC directory to read only for run " + run_id,
              'Error while setting the output QC directory to read only.\nCommand line:\n' + cmd, conf)
        return False

    # Create index.hml file
    sessions = [Settings.HISEQ_STEP_KEY, Settings.DEMUX_STEP_KEY, Settings.QC_STEP_KEY]
    common.create_html_index_file(conf, run_id, sessions)

    df_in_bytes = common.df(qc_output_dir)
    du_in_bytes = common.du(qc_output_dir)
    df = df_in_bytes / (1024 * 1024 * 1024)
    du = du_in_bytes / (1024 * 1024)

    common.log("WARNING", "QC step: output disk free after QC: " + str(df_in_bytes), conf)
    common.log("WARNING", "QC step: space used by QC: " + str(du_in_bytes), conf)