Python find_hiseq_run_path Beispiele, hiseq_run.find_hiseq_run_path Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: detection_end_run.py Projekt: GenomicParisCentre/aozan

def send_failed_run_message(run_id, secs, conf):
    """Send a mail to inform about a failed run.

    Arguments:
        conf: configuration dictionary
    """

    run_path = hiseq_run.find_hiseq_run_path(run_id, conf)
    file_to_test = run_path + '/' + run_id + '/RTAComplete.txt'
    last = os.stat(file_to_test).st_mtime

    df = common.df(run_path) / (1024 * 1024 * 1024)
    du = common.du(run_path + '/' + run_id) / (1024 * 1024 * 1024)

    common.send_msg('[Aozan] Failed run ' + run_id + ' on ' + common.get_instrument_name(run_id, conf),
                    'A run (' + run_id + ') has failed on ' + common.get_instrument_name(run_id, conf) +
                    ' at ' + common.time_to_human_readable(last) + '.\n' + 'Data for this run can be found at: ' +
                    run_path + '\n\nFor this task %.2f GB has been used and %.2f GB still free.' % (du, df),
                    False, conf)

Beispiel #2

0

Datei anzeigen

Datei: detection_end_run.py Projekt: GenomicParisCentre/aozan

def send_mail_if_recent_run(run_id, secs, conf):
    """Send an email to inform that a new run is finished.

    Arguments:
        run_id: run id
        secs: maximum delay since the end of the run
        conf: configuration object
    """

    run_path = hiseq_run.find_hiseq_run_path(run_id, conf)
    if run_path is False:
        return

    last = hiseq_run.check_end_run_since(run_id, secs, conf)

    if last > 0:
        df = common.df(run_path) / (1024 * 1024 * 1024)
        du = common.du(run_path + '/' + run_id) / (1024 * 1024 * 1024)
        common.send_msg('[Aozan] Ending run ' + run_id + ' on ' + common.get_instrument_name(run_id, conf),
                        'A new run (' + run_id + ') is finished on ' +
                        common.get_instrument_name(run_id, conf) + ' at ' + common.time_to_human_readable(
                            last) + '.\n' +
                        'Data for this run can be found at: ' + run_path +
                        '\n\nFor this task %.2f GB has been used and %.2f GB still free.' % (du, df), False, conf)

Beispiel #3

0

Datei anzeigen

Datei: detection_new_run.py Projekt: GenomicParisCentre/aozan

def send_report(run_id, conf):
    """Send a mail with the first base report.

    Arguments:
        run_id: the run id
        conf: configuration dictionary
    """

    #
    # Retrieve features the current run in RunInfos.xml file
    #

    run_info = hiseq_run.get_run_info(run_id, conf)

    if run_info is None:
        return False

    # TODO ?? add check sample-sheet if demux step enable
    # add warning in report if useful

    reads = run_info.getReads()
    error_cycles_per_read_not_indexes_count = 0
    reads_indexed_count = 0
    reads_not_indexed_count = 0
    cycles_count = 0
    cycles_per_read_not_indexed = 0

    for read in reads:
        cycles_count += read.getNumberCycles()
        if read.isIndexedRead():
            reads_indexed_count += 1
        else:
            reads_not_indexed_count += 1
            if cycles_per_read_not_indexed == 0:
                cycles_per_read_not_indexed = read.getNumberCycles()

            # Check same cycles count for each reads not indexed
            error_cycles_per_read_not_indexes_count = cycles_per_read_not_indexed != read.getNumberCycles()

    # Identification type run according to data in RunInfos.xml : SR or PE
    if reads_not_indexed_count == 1:
        type_run_estimated = "SR-" + str(cycles_per_read_not_indexed) + " with " + str(
            reads_indexed_count) + " index"
        if reads_indexed_count > 1:
            type_run_estimated += "es"
    elif reads_not_indexed_count == 2:
        type_run_estimated = "PE-" + str(cycles_per_read_not_indexed) + " with " + str(
            reads_indexed_count) + " index"
        if reads_indexed_count > 1:
            type_run_estimated += "es"
    else:
        type_run_estimated = "Undetermined run type (" + str(reads_not_indexed_count) + " reads with " + str(
            reads_indexed_count) + " index)"
        if reads_indexed_count > 1:
            type_run_estimated += "es"
        type_run_estimated += ")"

    description_run = "Informations about this run:\n"
    description_run += "\t- Sequencer: " + common.get_instrument_name(run_id, conf) + ".\n"
    description_run += "\t- " + str(run_info.getFlowCellLaneCount()) + " lanes with " + str(
        run_info.alignToPhix.size()) + " aligned to Phix.\n"
    description_run += "\t- " + str(reads_not_indexed_count) + " read"
    if reads_not_indexed_count > 1:
        description_run += "s"
    description_run += " and " + str(reads_indexed_count) + " index"
    if reads_indexed_count > 1:
        description_run += "es"
    description_run += ".\n"

    if error_cycles_per_read_not_indexes_count or cycles_per_read_not_indexed == 0:
        description_run += "\t- ERROR : cycles count per read different between reads (" + str(
            cycles_count) + " total cycles).\n"
    else:
        description_run += "\t- " + str(cycles_per_read_not_indexed) + " cycles per read (" + str(
            cycles_count) + " total cycles).\n"

    description_run += "\t- Estimated run type: " + type_run_estimated + ".\n"

    attachment_file = str(hiseq_run.find_hiseq_run_path(run_id, conf)) + '/' + run_id + '/' + common.FIRST_BASE_REPORT_FILE

    # If the First base report file exists, send it by email
    if common.is_file_readable(attachment_file):

        message = 'You will find attached to this message the first base report for the run ' + \
                  run_id + '.\n\n' + description_run
        common.send_msg_with_attachment('[Aozan] First base report for the run ' + type_run_estimated + '  ' + run_id +
                                        ' on ' + common.get_instrument_name(run_id, conf),
                                        message, attachment_file, False, conf)
    else:
        # With other no attachment file
        message = 'You will find below the parameters of the run ' + run_id + '.\n\n' + description_run
        common.send_msg('[Aozan] New run ' + type_run_estimated + ' ' + run_id + ' on ' +
                        common.get_instrument_name(run_id, conf), message,
                        False, conf)

    return True

Beispiel #4

0

Datei anzeigen

Datei: detection_end_run.py Projekt: GenomicParisCentre/aozan

def create_run_summary_reports(run_id, conf):
    """ Copy main files and directory from hiseq run directory to save in report run data directory.
        Save data in two distinct directory on hiseq and on report, and tar.bz2 version

    Arguments:
        run_id: the run id
        conf: configuration dictionary
    """

    hiseq_data_path = hiseq_run.find_hiseq_run_path(run_id, conf)
    tmp_base_path = conf[TMP_PATH_KEY]
    reports_data_base_path = conf[REPORTS_DATA_PATH_KEY]

    source_path = hiseq_data_path + '/' + run_id
    reports_data_path = common.get_report_run_data_path(run_id, conf)
    report_prefix = 'report_'
    hiseq_log_prefix = 'hiseq_log_'
    report_archive_file = report_prefix + run_id + '.tar.bz2'
    hiseq_log_archive_file = hiseq_log_prefix + run_id + '.tar.bz2'

    # Save quality control data
    tmp_path = tmp_base_path + '/' + run_id

    # Check if reports_data_path exists
    if not os.path.exists(reports_data_base_path):
        error("Failed to create report archive: Report directory does not exist",
              "Failed to create report archive: Report directory does not exist: " + reports_data_base_path, conf)
        return False

    # Check if temporary directory exists
    if not os.path.exists(tmp_base_path):
        error("Failed to create report archive: Temporary directory does not exist",
              "Failed to create report archive: Temporary directory does not exist: " + tmp_base_path, conf)
        return False

    # Check if reports archive exists
    if os.path.exists(reports_data_path + '/' + report_archive_file):
        error("Failed to create report archive: Report archive already exists for run " + run_id,
              "Failed to create report archive: Report archive already exists for run " + run_id + " : " + report_archive_file, conf)
        return False

    # Check if hiseq log archive exists
    if os.path.exists(reports_data_path + '/' + hiseq_log_archive_file):
        error("Failed to create report archive: Hiseq log archive already exists for run " + run_id,
              "Failed to create report archive: Hiseq log archive already exists for run " + run_id + " : " + hiseq_log_archive_file, conf)
        return False

    # Create if not exist archive directory for the run
    if not os.path.exists(reports_data_path):
        os.mkdir(reports_data_path)

    # Create run tmp  directory
    if os.path.exists(tmp_path):
        error("Failed to create report archive: Temporary run data directory already exists for run " + run_id,
              "Failed to create report archive: Temporary run data directory already exists for run " + run_id + " : " + hiseq_log_archive_file, conf)
    else:
        os.mkdir(tmp_path)

    # Define set file to copy in report archive, check if exists (depend on parameters Illumina)
    files = ['InterOp', 'RunInfo.xml', 'runParameters.xml', 'RunParameters.xml']
    files_to_copy = list_existing_files(source_path, files)

    if len(files_to_copy) == 0:
        common.log("WARNING",
                   "Archive " + hiseq_log_archive_file + " not created: none file exists " + str(files) +
                   ' in ' + source_path, conf)
    else:
        cmd = 'cd ' + quote(source_path) + ' && ' + \
              'cp -rp ' + files_to_copy + ' ' + quote(tmp_path) + ' && ' + \
              'cd ' + quote(tmp_base_path) + ' && ' + \
              'mv ' + quote(run_id) + ' ' + quote(hiseq_log_prefix + run_id) + ' && ' + \
              'tar cjf ' + quote(reports_data_path + '/' + hiseq_log_archive_file) + ' ' + quote(hiseq_log_prefix + run_id) + ' && ' +\
              'rm -rf ' + quote(tmp_path)
        # + ' && rm -rf ' + hiseq_log_prefix + run_id

        common.log("INFO", "exec: " + cmd, conf)
        if os.system(cmd) != 0:
            error("Failed to create report archive: Error while saving Illumina quality control for run " + run_id,
                  "Failed to create report archive: Error saving Illumina quality control.\nCommand line:\n" + cmd, conf)
            return False

    # Save html reports
    if os.path.exists(tmp_path):
        cmd = 'rm -rf ' + quote(tmp_path)

        common.log("INFO", "exec: " + cmd, conf)
        if os.system(cmd) != 0:
            error("Failed to create report archive: Error while removing existing temporary directory",
                  "Failed to create report archive: Error while removing existing temporary directory.\nCommand line:\n" + cmd, conf)
            return False

    os.mkdir(tmp_path)

    # Define set file to copy in report archive, check if exists (depend on parameters Illumina)
    if common.get_rta_major_version(run_id, conf) == 1:
        files = ['./Data/Status_Files', './Data/reports', './Data/Status.htm', './First_Base_Report.htm']
    else:
        files = ['./Config', './Recipe', './RTALogs', './RTAConfiguration.xml', './RunCompletionStatus.xml']

    files_to_copy = list_existing_files(source_path, files)
    if len(files_to_copy) == 0:
        common.log("WARNING", "Archive " + report_archive_file + " not created: none file exists " + str(
            files) + ' in ' + source_path, conf)
    else:
        cmd = 'cd ' + quote(source_path) + ' && ' + \
              'cp -rp ' + files_to_copy + ' ' + quote(tmp_path) + ' && ' + \
              'cd ' + quote(tmp_base_path) + ' && ' + \
              'mv ' + quote(run_id) + ' ' + quote(report_prefix + run_id) + ' && ' + \
              'tar cjf ' + quote(reports_data_path + '/' + report_archive_file) + ' ' + quote(report_prefix + run_id) + ' && ' + \
              'mv ' + quote(report_prefix + run_id) + ' ' + quote(reports_data_path)

        common.log("INFO", "exec: " + cmd, conf)
        if os.system(cmd) != 0:
            error("Failed to create report archive: Error while saving Illumina HTML reports for run " + run_id,
                  "Failed to create report archive: Error saving Illumina HTML reports.\nCommand line:\n" + cmd, conf)
            return False

    # Create index.hml file
    common.create_html_index_file(conf, run_id, [HISEQ_STEP_KEY])

    # Set read only the report directory
    common.chmod_files_in_dir(reports_data_path + '/' + report_prefix, None, conf)

    # Set read only archives files
    common.chmod(reports_data_path + '/' + report_archive_file, conf)
    common.chmod(reports_data_path + '/' + hiseq_log_archive_file, conf)

    return True

Beispiel #5

0

Datei anzeigen

Datei: sync_run.py Projekt: GenomicParisCentre/aozan

def partial_sync(run_id, last_sync, conf):
    """Partial synchronization of a run.

        Arguments:
                run_id: the run id
                last_sync: last synchronization
                conf: configuration dictionary
    """

    hiseq_data_path = hiseq_run.find_hiseq_run_path(run_id, conf)
    bcl_data_path = conf[BCL_DATA_PATH_KEY]
    final_output_path = bcl_data_path + '/' + run_id

    # Check if hiseq_data_path exists
    if hiseq_data_path is False:
        error('Sequencer run data not found',
              'Sequencer data for run ' + run_id + ' not found in sequencer directories (' +
              conf[HISEQ_DATA_PATH_KEY] + ')', conf)
        return False

    # Check if hiseq_data_path exists
    if not os.path.exists(hiseq_data_path):
        error("Sequencer directory does not exist", "Sequencer directory does not exist: " + hiseq_data_path, conf)
        return False

    # Check if bcl_data_path exists
    if not os.path.exists(bcl_data_path):
        error("Basecalling directory does not exist", "Basecalling directory does not exist: " + bcl_data_path, conf)
        return False

    # Check if final output path already exists
    if os.path.exists(final_output_path):
        error("Basecalling directory for run " + run_id + " already exists",
              "Basecalling directory for run " + run_id + " already exists: " + final_output_path, conf)
        return False

    input_path = hiseq_data_path + '/' + run_id
    output_path = bcl_data_path + '/' + run_id + '.tmp'

    # Create output path for run if not exist
    if not os.path.exists(output_path):
        os.mkdir(output_path)

    input_path_du = common.du(input_path)
    output_path_du = common.du(output_path)
    output_path_df = common.df(bcl_data_path)
    du_factor = float(conf[SYNC_SPACE_FACTOR_KEY])
    space_needed = input_path_du * du_factor - output_path_du

    common.log("WARNING", "Sync step: input disk usage: " + str(input_path_du), conf)
    common.log("WARNING", "Sync step: output disk free: " + str(output_path_df), conf)
    common.log("WARNING", "Sync step: space needed: " + str(space_needed), conf)

    # Check if free space is available on
    if output_path_df < space_needed:
        error("Not enough disk space to perform synchronization for run " + run_id,
              "Not enough disk space to perform synchronization for run " + run_id +
              '.\n%.2f Gb' % (space_needed / 1024 / 1024 / 1024) + ' is needed (factor x' + str(
                  du_factor) + ') on ' + bcl_data_path + '.', conf)
        return False

        # exclude CIF files ?
    #     if common.is_conf_value_equals_true(SYNC_EXCLUDE_CIF_KEY, conf):
    #         exclude_files = ['*.cif', '*_pos.txt', '*.errorMap', '*.FWHMMap']
    #     else:
    #         exclude_files = []

    # Extract exclude file from sequencer type and configuration
    exclude_files = get_exclude_files_list(run_id, conf)

    rsync_manifest_path = conf[TMP_PATH_KEY] + '/' + run_id + '.rsync.manifest'
    rsync_params = ''

    if last_sync:
        for exclude_file in exclude_files:
            rsync_params += " --exclude '" + exclude_file + "' "
    else:
        # Exclude files that will be rewritten severals times during the run
        exclude_files.extend(['*.bin', '*.txt', '*.xml'])
        cmd = 'cd ' + quote(input_path) + ' && find . -type f -mmin +' + conf[SYNC_CONTINUOUS_SYNC_MIN_AGE_FILES_KEY]
        for exclude_file in exclude_files:
            cmd += " -not -name '" + exclude_file + "' "
        cmd += ' > ' + quote(rsync_manifest_path)
        common.log("INFO", "exec: " + cmd, conf)
        if os.system(cmd) != 0:
            error("Error while executing rsync for run " + run_id, 'Error while executing find.\nCommand line:\n' + cmd,
                  conf)
            return False
        rsync_params = '--files-from=' + quote(rsync_manifest_path)

    # Copy data from hiseq path to bcl path
    cmd = 'rsync  -a --no-owner --no-group ' + rsync_params + ' ' + quote(input_path + '/') + ' ' + quote(output_path)
    common.log("INFO", "exec: " + cmd, conf)
    if os.system(cmd) != 0:
        error("Error while executing rsync for run " + run_id, 'Error while executing rsync.\nCommand line:\n' + cmd,
              conf)
        return False

    if not last_sync:
        os.remove(rsync_manifest_path)

    return True