Example #1
0
def get_run_info(run_id, conf):
    """Get the RunInfo object.

    Arguments:
        run_id: the run id
        conf: configuration dictionary
    """

    file_src = get_runinfos_file(run_id, conf)

    if file_src is None:
        return None

    return RunInfo.parse(file_src)
Example #2
0
def demux(run_id, conf):
    """Add a processed run id to the list of the run ids.

    Arguments:
        run_id: The run id
        conf: configuration dictionary
    """

    start_time = time.time()
    common.log('INFO', 'Demux step: Starting', conf)

    reports_data_base_path = conf[REPORTS_DATA_PATH_KEY]
    reports_data_path = common.get_report_run_data_path(run_id, conf)

    samplesheet_filename = build_samplesheet_filename(run_id, conf)
    bcl2fastq_samplesheet_path = conf[TMP_PATH_KEY] + '/' + samplesheet_filename + '.csv'

    input_run_data_path = common.get_input_run_data_path(run_id, conf)

    if input_run_data_path is None:
        return False

    fastq_output_dir = conf[FASTQ_DATA_PATH_KEY] + '/' + run_id

    basecall_stats_prefix = 'basecall_stats_'
    basecall_stats_file = basecall_stats_prefix + run_id + '.tar.bz2'

    # Check if root input bcl data directory exists
    if not os.path.exists(input_run_data_path):
        error("Basecalling data directory does not exist",
              "Basecalling data directory does not exist: " + str(input_run_data_path), conf)
        # return False

    # Check if root input fastq data directory exists
    if not common.is_dir_exists(FASTQ_DATA_PATH_KEY, conf):
        error("FASTQ data directory does not exist",
              "FASTQ data directory does not exist: " + conf[FASTQ_DATA_PATH_KEY], conf)
        return False

    # Check if bcl2fastq samplesheets path exists
    if not common.is_dir_exists(BCL2FASTQ_SAMPLESHEETS_PATH_KEY, conf):
        error("Bcl2fastq samplesheet directory does not exist",
              "Bcl2fastq samplesheet directory does not exist: " + conf[BCL2FASTQ_SAMPLESHEETS_PATH_KEY], conf)
        return False

    # Check if bcl2fastq basedir path exists
    if not common.is_conf_value_equals_true(BCL2FASTQ_USE_DOCKER_KEY, conf):
        if not common.is_dir_exists(BCL2FASTQ_PATH_KEY, conf):
            error("Bcl2fastq directory does not exist",
                  "Bcl2fastq directory does not exist: " + conf[BCL2FASTQ_PATH_KEY], conf)
            return False

    # Check if temporary directory exists
    if not common.is_dir_exists(TMP_PATH_KEY, conf):
        error("Temporary directory does not exist",
              "Temporary directory does not exist: " + conf[TMP_PATH_KEY], conf)
        return False

    # Check if reports_data_path exists
    if not os.path.exists(reports_data_base_path):
        error("Report directory does not exist",
              "Report directory does not exist: " + reports_data_base_path, conf)
        return False

    # Create if not exist report directory for the run
    if not os.path.exists(reports_data_path):
        os.mkdir(reports_data_path)

    # Check if basecall stats archive exists
    if os.path.exists(reports_data_path + '/' + basecall_stats_file):
        error('Basecall stats archive already exists for run ' + run_id,
              'Basecall stats archive already exists for run ' + run_id + ': ' + basecall_stats_file, conf)
        return False

    # Check if the output directory already exists
    if os.path.exists(fastq_output_dir):
        error("FASTQ output directory already exists for run " + run_id,
              'FASTQ output directory already exists for run ' + run_id + ': ' + fastq_output_dir, conf)
        return False

    # Compute disk usage and disk free to check if enough disk space is available
    input_path_du = common.du(input_run_data_path)
    output_df = common.df(conf[FASTQ_DATA_PATH_KEY])
    du_factor = float(conf[DEMUX_SPACE_FACTOR_KEY])
    space_needed = input_path_du * du_factor

    common.log("WARNING", "Demux step: input disk usage: " + str(input_path_du), conf)
    common.log("WARNING", "Demux step: output disk free: " + str(output_df), conf)
    common.log("WARNING", "Demux step: space needed: " + str(space_needed), conf)

    common.log("CONFIG", "Bcl2fastq Docker mode: " + str(
        common.is_conf_value_equals_true(Settings.BCL2FASTQ_USE_DOCKER_KEY, conf)), conf)

    # Check if free space is available
    if output_df < space_needed:
        error("Not enough disk space to perform demultiplexing for run " + run_id,
              "Not enough disk space to perform demultiplexing for run " + run_id +
              '.\n%.2f Gb' % (space_needed / 1024 / 1024 / 1024) + ' is needed (factor x' + str(
                  du_factor) + ') on ' + fastq_output_dir + '.', conf)
        return False

    # Load RunInfo object
    run_info = RunInfo.parse(input_run_data_path + '/RunInfo.xml')

    # Load samplesheet
    samplesheet, original_samplesheet_path = load_samplesheet(run_id, input_run_data_path, samplesheet_filename, conf)

    if samplesheet is None:
        return False

    # Update samplesheet
    if not update_samplesheet(samplesheet, run_id, run_info.getFlowCellLaneCount(), conf):
        return False

    # Check samplesheet
    check_result, samplesheet_warnings = check_samplesheet(samplesheet, run_id, run_info.getFlowCell(), conf)
    if not check_result:
        return False

    # Get the number of mismatches
    nb_mismatch = get_bcl2fastq_mismatches(samplesheet, conf[BCL2FASTQ_MISMATCHES_KEY])

    # Write final samplesheet
    if not write_bcl2fastq_samplesheet(samplesheet, bcl2fastq_samplesheet_path, conf):
        return False

    # Run demultiplexing
    if common.is_conf_value_equals_true(Settings.BCL2FASTQ_USE_DOCKER_KEY, conf):
        # With image docker
        if not demux_run_with_docker(run_id, input_run_data_path, fastq_output_dir, bcl2fastq_samplesheet_path,
                                     nb_mismatch, conf):
            return False
    else:
        if not demux_run_standalone(run_id, input_run_data_path, fastq_output_dir, bcl2fastq_samplesheet_path,
                                    nb_mismatch, conf):
            return False

    # Check if the output directory has been created
    if not os.path.exists(fastq_output_dir):
        error("Error while demultiplexing run " + run_id + ' on ' + common.get_instrument_name(run_id, conf),
              'Error while demultiplexing run ' + run_id + '.\n' +
              'The output directory of bcl2fastq has been created: ' + fastq_output_dir, conf)
        return False

    # Check if the output directory has been created
    if os.path.isfile(fastq_output_dir):
        error("Error while demultiplexing run " + run_id + ' on ' + common.get_instrument_name(run_id, conf),
              'Error while demultiplexing run ' + run_id + '.\n' +
              'The output directory of bcl2fastq is a file instead of a directory: ' + fastq_output_dir, conf)
        return False

    # Copy bcl2fastq log to output directory
    cmd = 'cp ' + quote(conf[TMP_PATH_KEY]) + '/bcl2fastq_output_' + run_id + '.* ' + quote(fastq_output_dir)
    common.log("INFO", "exec: " + cmd, conf)
    if os.system(cmd) != 0:
        error("Error while copying bcl2fastq log to the output FASTQ directory" + run_id_msg,
              'Error while copying bcl2fastq log to the output FASTQ directory.\nCommand line:\n' + cmd, conf)
        return False

    # The output directory must be read only
    if not common.chmod_files_in_dir(fastq_output_dir, ".fastq", conf):
        error("Error while setting the output FASTQ directory to read only" + run_id_msg,
              'Error while setting the output FASTQ directory to read only.\nCommand line:\n' + cmd, conf)
        return False


    if not check_if_output_fastq_files_exists(fastq_output_dir):
        error("Error with bcl2fastq execution for run " + run_id,
              "Error with bcl2fastq execution for run " + run_id + " no FASTQ file found in " + fastq_output_dir,
              conf)
        return False

    # Copy samplesheet to output directory
    cmd = 'cp -p ' + quote(bcl2fastq_samplesheet_path) + ' ' + quote(fastq_output_dir + '/SampleSheet.csv')
    common.log("INFO", "exec: " + cmd, conf)
    if os.system(cmd) != 0:
        error("Error while copying samplesheet file to FASTQ directory for run " + run_id,
              'Error while copying samplesheet file to FASTQ directory.\nCommand line:\n' + cmd, conf)
        return False

    # Create archives on demultiplexing statistics
    if not archive_demux_stat(run_id, fastq_output_dir, reports_data_path, basecall_stats_file,
                              basecall_stats_prefix, bcl2fastq_samplesheet_path, conf):
        return False

    # Archive samplesheet
    if not archive_samplesheet(run_id, original_samplesheet_path, bcl2fastq_samplesheet_path, conf):
        return False

    # Remove temporary samplesheet files
    if os.path.exists(bcl2fastq_samplesheet_path):
        os.remove(bcl2fastq_samplesheet_path)

    # Create index.hml file
    common.create_html_index_file(conf, run_id, [Settings.HISEQ_STEP_KEY, Settings.DEMUX_STEP_KEY])

    df_in_bytes = common.df(fastq_output_dir)
    du_in_bytes = common.du(fastq_output_dir)
    df = df_in_bytes / (1024 * 1024 * 1024)
    du = du_in_bytes / (1024 * 1024 * 1024)

    common.log("WARNING", "Demux step: output disk free after demux: " + str(df_in_bytes), conf)
    common.log("WARNING", "Demux step: space used by demux: " + str(du_in_bytes), conf)

    duration = time.time() - start_time

    msg = 'Ending demultiplexing with ' + nb_mismatch + ' mismatch(es) for run ' + run_id + '.' + \
          '\nJob finished at ' + common.time_to_human_readable(time.time()) + \
          ' without error in ' + common.duration_to_human_readable(duration) + '.\n\n' + \
          'FASTQ files for this run ' + \
          'can be found in the following directory:\n  ' + fastq_output_dir

    if samplesheet_warnings.size() > 0:
        msg += '\n\nSamplesheet warnings:'
        for warn in samplesheet_warnings:
            msg += "\n  - " + warn

    # Add path to report if reports.url exists
    if common.is_conf_key_exists(REPORTS_URL_KEY, conf):
        msg += '\n\nRun reports can be found at following location:\n  ' + conf[REPORTS_URL_KEY] + '/' + run_id

    msg += '\n\nFor this task %.2f GB has been used and %.2f GB still free.' % (du, df)

    common.send_msg('[Aozan] Ending demultiplexing for run ' + run_id + ' on ' +
                    common.get_instrument_name(run_id, conf), msg, False, conf)
    common.log('INFO', 'Demux step: successful in ' + common.duration_to_human_readable(duration), conf)

    return True
Example #3
0
def load_samplesheet(run_id, input_run_data_path, samplesheet_filename, conf):
    """ Load the samplesheet.

    Arguments:
        run id: The run id
        input_run_data_path: The input run data path
        samplesheet_filename: samplesheet filename
        conf: configuration dictionary

    Return:
        a Samplesheet object and the original path of the samplesheet
    """

    run_info_path = input_run_data_path + '/RunInfo.xml'

    if not os.path.isfile(run_info_path):
        error("no RunInfo.xml file found for run " + run_id,
              "No RunInfo.xml file found for run " + run_id + ': ' + run_info_path + '.\n', conf)
        return None, None

    run_info = RunInfo.parse(run_info_path)
    flow_cell_id = run_info.getFlowCell()

    common.log("INFO", "Flowcell id: " + flow_cell_id, conf)
    common.log("INFO", "Samplesheet format: " + str(conf[BCL2FASTQ_SAMPLESHEET_FORMAT_KEY]), conf)

    try:
        if common.is_conf_value_defined(BCL2FASTQ_SAMPLESHEET_FORMAT_KEY, 'xls', conf):
            return load_samplesheet_using_extension(conf,samplesheet_filename,'xls',input_run_data_path,run_id)

        elif common.is_conf_value_defined(BCL2FASTQ_SAMPLESHEET_FORMAT_KEY, 'csv', conf):
            return load_samplesheet_using_extension(conf,samplesheet_filename,'csv',input_run_data_path,run_id)

        elif common.is_conf_value_defined(BCL2FASTQ_SAMPLESHEET_FORMAT_KEY, 'xlsx', conf):
            return load_samplesheet_using_extension(conf,samplesheet_filename,'xlsx',input_run_data_path,run_id)

        elif common.is_conf_value_defined(BCL2FASTQ_SAMPLESHEET_FORMAT_KEY, 'command', conf):
            action_error_msg = 'Error while creating Bcl2fastq CSV samplesheet file'
            if not common.is_conf_key_exists(BCL2FASTQ_SAMPLESHEET_GENERATOR_COMMAND_KEY, conf):
                error(action_error_msg + ' for run ' + run_id, action_error_msg + ' the command is empty.', conf)
                return None, None

            input_samplesheet_generated_path = conf[TMP_PATH_KEY] + '/' + samplesheet_filename + 'generated.csv'

            cmd = conf[
                      BCL2FASTQ_SAMPLESHEET_GENERATOR_COMMAND_KEY] + ' ' + run_id + ' ' + quote(input_samplesheet_generated_path)
            common.log("INFO", "exec: " + cmd, conf)
            if os.system(cmd) != 0:
                error(action_error_msg + ' for run ' + run_id,
                      action_error_msg + '.\nCommand line:\n' + cmd, conf)

            if not os.path.exists(input_samplesheet_generated_path):
                error(action_error_msg + ' for run ' + run_id,
                      action_error_msg + ', the external command did not create Bcl2fastq CSV file:\n' + cmd, conf)
                return None, None

            # Load CSV samplesheet file
            samplesheet = SampleSheetCSVReader(input_samplesheet_generated_path).read()

            # Remove generated samplesheet
            os.unlink(input_samplesheet_generated_path)

            return samplesheet, None

        else:
            error('Error while creating Bcl2fastq CSV samplesheet file for run ' + run_id,
                  'No method to get Bcl2fastq samplesheet file has been defined. Please, set the ' +
                  '"bcl2fastq.samplesheet.format" property.\n',
                  conf)
            return None, None

    except AozanException, exp:
        print StringUtils.stackTraceToString(exp)

        error("Error reading samplesheet: " + samplesheet_filename, exp.getMessage(), conf)
        return None, None