コード例 #1
0
ファイル: Runs.py プロジェクト: eriksjolund/TACA
 def post_qc(self, qc_file, status, log_file, rcp):
     """ Checks wether a run has passed the final qc.
         :param str run: Run directory
         :param str qc_file: Path to file with information about transferred runs
         :param str log_file: Path to the log file
         :param str rcp: destinatary
     """
     already_seen=False
     runname=self.id
     shortrun=runname.split('_')[0] + '_' +runname.split('_')[-1]
     with open(qc_file, 'ab+') as f:
         f.seek(0)
         for row in f:
             #Rows have two columns: run and transfer date
             if row.split('\t')[0] == runname:
                 already_seen=True
 
         if not already_seen:
             if status:
                 f.write("{}\tPASSED\n".format(runname))
             else:
                 sj="{} failed QC".format(runname)
                 cnt="""The run {run} has failed qc and will NOT be transfered to Nestor.
                 
                     The run might be available at : https://genomics-status.scilifelab.se/flowcells/{shortfc}
                 
                     To read the logs, run the following command on {server}
                     grep -A30 "Checking run {run}" {log}
                 
                     To force the transfer :
                     taca analysis transfer {rundir} """.format(run=runname, shortfc=shortrun, log=log_file, server=os.uname()[1], rundir=self.id)
                 misc.send_mail(sj, cnt, rcp)
                 f.write("{}\tFAILED\n".format(os.path.basename(self.id)))
コード例 #2
0
ファイル: cleanup.py プロジェクト: SciLifeLab/TACA
def cleanup_processing(seconds):
    """Cleanup runs in processing server.

    :param int seconds: Days/hours converted as second to consider a run to be old
    """
    try:
        #Remove old runs from archiving dirs
        for archive_dir in CONFIG.get('storage').get('archive_dirs').values():
            logger.info('Removing old runs in {}'.format(archive_dir))
            with filesystem.chdir(archive_dir):
                for run in [r for r in os.listdir(archive_dir) if re.match(filesystem.RUN_RE, r)]:
                    rta_file = os.path.join(run, finished_run_indicator)
                    if os.path.exists(rta_file):
                        if os.stat(rta_file).st_mtime < time.time() - seconds:
                            logger.info('Removing run {} to nosync directory'.format(os.path.basename(run)))
                            shutil.rmtree(run)
                        else:
                            logger.info('{} file exists but is not older than given time, skipping run {}'.format(
                                        finished_run_indicator, run))
    except IOError:
        sbj = "Cannot archive old runs in processing server"
        msg = ("Could not find transfer.tsv file, so I cannot decide if I should "
               "archive any run or not.")
        cnt = CONFIG.get('contact', None)
        if not cnt:
            cnt = "{}@localhost".format(getpass.getuser())
        logger.error(msg)
        misc.send_mail(sbj, msg, cnt)
コード例 #3
0
ファイル: cleanup.py プロジェクト: vezzi/TACA
def cleanup_nas(seconds):
    """Will move the finished runs in NASes to nosync directory.

    :param int seconds: Days/hours converted as second to consider a run to be old
    """
    couch_info = CONFIG.get('statusdb')
    mail_recipients = CONFIG.get('mail', {}).get('recipients')
    check_demux = CONFIG.get('storage', {}).get('check_demux', False)
    host_name = os.getenv('HOSTNAME', os.uname()[1]).split('.', 1)[0]
    for data_dir in CONFIG.get('storage').get('data_dirs'):
        logger.info('Moving old runs in {}'.format(data_dir))
        with filesystem.chdir(data_dir):
            for run in [r for r in os.listdir(data_dir) if re.match(filesystem.RUN_RE, r)]:
                rta_file = os.path.join(run, finished_run_indicator)
                if os.path.exists(rta_file):
                    if check_demux:
                        if misc.run_is_demuxed(run, couch_info):
                            logger.info('Moving run {} to nosync directory'.format(os.path.basename(run)))
                            shutil.move(run, 'nosync')
                        elif os.stat(rta_file).st_mtime < time.time() - seconds:
                            logger.warn('Run {} is older than given time, but it is not demultiplexed yet'
                                        .format(run))
                            sbt = "Run not demultiplexed - {}".format(run)
                            msg = ("Run '{}' in '{}' is older then given threshold, but seems like it is not "
                                  "yet demultiplexed".format(os.path.join(data_dir, run), host_name))
                            misc.send_mail(sbt, msg, mail_recipients)
                    else:
                        if os.stat(rta_file).st_mtime < time.time() - seconds:
                            logger.info('Moving run {} to nosync directory'.format(os.path.basename(run)))
                            shutil.move(run, 'nosync')
                        else:
                            logger.info('{} file exists but is not older than given time, skipping run {}'
                                        .format(finished_run_indicator, run))
コード例 #4
0
ファイル: backup.py プロジェクト: vezzi/TACA
 def _check_status(self, cmd, status, err_msg, mail_failed, files_to_remove=[]):
     """Check if a subprocess status is success and log error if failed"""
     if status != 0:
         self._clean_tmp_files(files_to_remove)
         if mail_failed:
             subjt = "Command call failed - {}".format(self.host_name)
             e_msg = "Called cmd: {}\n\nError msg: {}".format(" ".join(cmd), err_msg)
             misc.send_mail(subjt, e_msg, self.mail_recipients)
         logger.error("Command '{}' failed with the error '{}'".format(" ".join(cmd),err_msg))
         return False
     return True
コード例 #5
0
ファイル: storage.py プロジェクト: guillermo-carrasco/TACA
def cleanup_processing(days):
    """Cleanup runs in processing server.

    :param int days: Number of days to consider a run to be old
    """
    transfer_file = os.path.join(CONFIG.get("preprocessing", {}).get("status_dir"), "transfer.tsv")
    if not days:
        days = CONFIG.get("cleanup", {}).get("processing-server", {}).get("days", 10)
    try:
        # Move finished runs to nosync
        for data_dir in CONFIG.get("storage").get("data_dirs"):
            logger.info("Moving old runs in {}".format(data_dir))
            with filesystem.chdir(data_dir):
                for run in [r for r in os.listdir(data_dir) if re.match(filesystem.RUN_RE, r)]:
                    if filesystem.is_in_file(transfer_file, run):
                        logger.info("Moving run {} to nosync directory".format(os.path.basename(run)))
                        shutil.move(run, "nosync")
                    else:
                        logger.info(
                            ("Run {} has not been transferred to the analysis " "server yet, not archiving".format(run))
                        )
        # Remove old runs from archiving dirs
        for archive_dir in CONFIG.get("storage").get("archive_dirs").values():
            logger.info("Removing old runs in {}".format(archive_dir))
            with filesystem.chdir(archive_dir):
                for run in [r for r in os.listdir(archive_dir) if re.match(filesystem.RUN_RE, r)]:
                    rta_file = os.path.join(run, "RTAComplete.txt")
                    if os.path.exists(rta_file):
                        # 1 day == 60*60*24 seconds --> 86400
                        if os.stat(rta_file).st_mtime < time.time() - (86400 * days) and filesystem.is_in_swestore(
                            "{}.tar.bz2".format(run)
                        ):
                            logger.info("Removing run {} to nosync directory".format(os.path.basename(run)))
                            shutil.rmtree(run)
                        else:
                            logger.info(
                                "RTAComplete.txt file exists but is not older than {} day(s), skipping run {}".format(
                                    str(days), run
                                )
                            )

    except IOError:
        sbj = "Cannot archive old runs in processing server"
        msg = "Could not find transfer.tsv file, so I cannot decide if I should " "archive any run or not."
        cnt = CONFIG.get("contact", None)
        if not cnt:
            cnt = "{}@localhost".format(getpass.getuser())
        logger.error(msg)
        misc.send_mail(sbj, msg, cnt)
コード例 #6
0
ファイル: bioinfo_tab.py プロジェクト: jfnavarro/TACA
def error_emailer(flag, info):
    recipients = CONFIG['mail']['recipients']
    
    #failed_run: Samplesheet for a given project couldn't be found
    
    body='TACA has encountered an issue that might be worth investigating\n'
    body+='The offending entry is: '
    body+= info
    body+='\n\nSincerely, TACA'

    if (flag == 'no_samplesheet'):
        subject='ERROR, Samplesheet error'
    elif (flag == "failed_run"):
        subject='WARNING, Reinitialization of partially failed FC'
    elif (flag == 'weird_samplesheet'):
        subject='ERROR, Incorrectly formatted samplesheet'
       
    hourNow = datetime.datetime.now().hour 
    if hourNow == 7 or hourNow == 12 or hourNow == 16:
        send_mail(subject, body, recipients)
コード例 #7
0
def error_emailer(flag, info):
    recipients = CONFIG['mail']['recipients']

    #failed_run: Samplesheet for a given project couldn't be found

    body = 'TACA has encountered an issue that might be worth investigating\n'
    body += 'The offending entry is: '
    body += info
    body += '\n\nSincerely, TACA'

    if (flag == 'no_samplesheet'):
        subject = 'ERROR, Samplesheet error'
    elif (flag == "failed_run"):
        subject = 'WARNING, Reinitialization of partially failed FC'
    elif (flag == 'weird_samplesheet'):
        subject = 'ERROR, Incorrectly formatted samplesheet'

    hourNow = datetime.datetime.now().hour
    if hourNow == 7 or hourNow == 12 or hourNow == 16:
        send_mail(subject, body, recipients)
コード例 #8
0
def cleanup_nas(seconds):
    """Will move the finished runs in NASes to nosync directory.

    :param int seconds: Days/hours converted as second to consider a run to be old
    """
    couch_info = CONFIG.get('statusdb')
    mail_recipients = CONFIG.get('mail', {}).get('recipients')
    check_demux = CONFIG.get('storage', {}).get('check_demux', False)
    host_name = os.getenv('HOSTNAME', os.uname()[1]).split('.', 1)[0]
    for data_dir in CONFIG.get('storage').get('data_dirs'):
        if not os.path.exists(data_dir) or not os.path.isdir(data_dir):
            logger.warn('Data directory "{}" does not exist or not a directory'.format(data_dir))
            continue
        logger.info('Moving old runs in {}'.format(data_dir))
        with filesystem.chdir(data_dir):
            for run in [r for r in os.listdir(data_dir) if re.match(filesystem.RUN_RE, r)]:
                rta_file = os.path.join(run, finished_run_indicator)
                if os.path.exists(rta_file):
                    if check_demux:
                        if misc.run_is_demuxed(run, couch_info):
                            logger.info('Moving run {} to nosync directory'.format(os.path.basename(run)))
                            shutil.move(run, 'nosync')
                        elif 'miseq' in data_dir:
                            miseq_run = MiSeq_Run(run, CONFIG)
                            if miseq_run.get_run_type() == 'NON-NGI-RUN':
                                logger.info('Run {} is a non-platform run, so moving it to nosync directory'.format(os.path.basename(run)))
                                shutil.move(run, 'nosync')
                        elif os.stat(rta_file).st_mtime < time.time() - seconds:
                            logger.warn('Run {} is older than given time, but it is not demultiplexed yet'
                                        .format(run))
                            sbt = 'Run not demultiplexed - {}'.format(run)
                            msg = ('Run "{}" in "{}" is older then given threshold, but seems like it is not '
                            'yet demultiplexed'.format(os.path.join(data_dir, run), host_name))
                            misc.send_mail(sbt, msg, mail_recipients)
                    else:
                        if os.stat(rta_file).st_mtime < time.time() - seconds:
                            logger.info('Moving run {} to nosync directory'.format(os.path.basename(run)))
                            shutil.move(run, 'nosync')
                        else:
                            logger.info('{} file exists but is not older than given time, skipping run {}'
                                        .format(finished_run_indicator, run))
コード例 #9
0
 def avail_disk_space(self, path, run):
     """Check the space on file system based on parent directory of the run."""
     # not able to fetch runtype use the max size as precaution, size units in GB
     illumina_run_sizes = {
         'hiseq': 500,
         'hiseqx': 900,
         'novaseq': 1800,
         'miseq': 20,
         'nextseq': 250
     }
     required_size = illumina_run_sizes.get(self._get_run_type(run),
                                            900) * 2
     # check for any ongoing runs and add up the required size accrdingly
     for ddir in self.data_dirs.values():
         if not os.path.isdir(ddir):
             continue
         for item in os.listdir(ddir):
             if not re.match(filesystem.RUN_RE, item):
                 continue
             if not os.path.exists(
                     os.path.join(ddir, item, 'RTAComplete.txt')):
                 required_size += illumina_run_sizes.get(
                     self._get_run_type(run), 900)
     # get available free space from the file system
     try:
         df_proc = sp.Popen(['df', path], stdout=sp.PIPE, stderr=sp.PIPE)
         df_out, df_err = df_proc.communicate()
         available_size = int(df_out.strip().split('\n')
                              [-1].strip().split()[3]) / 1024 / 1024
     except Exception as e:
         logger.error(
             'Evaluation of disk space failed with error {}'.format(e))
         raise SystemExit
     if available_size < required_size:
         e_msg = 'Required space for encryption is {}GB, but only {}GB available'.format(
             required_size, available_size)
         subjt = 'Low space for encryption - {}'.format(self.host_name)
         logger.error(e_msg)
         misc.send_mail(subjt, e_msg, self.mail_recipients)
         raise SystemExit
コード例 #10
0
ファイル: Runs.py プロジェクト: kate-v-stepanova/TACA
    def post_qc(self, qc_file, status, log_file, rcp):
        """ Checks wether a run has passed the final qc.
            :param str run: Run directory
            :param str qc_file: Path to file with information about transferred runs
            :param str log_file: Path to the log file
            :param str rcp: destinatary
        """
        already_seen = False
        runname = self.id
        shortrun = runname.split('_')[0] + '_' +runname.split('_')[-1]
        QC_result = ""
        with open(qc_file, 'ab+') as f:
            f.seek(0)
            for row in f:
                # Rows have two columns: run and transfer date
                if row.split('\t')[0] == runname:
                    already_seen=True
            if status:
                QC_result = "PASSED"
            else:
                QC_result = "FAILED"
            
            if not already_seen:
                f.write("{}\t{}\n".format(runname,QC_result))
            
            sj = "{} Demultiplexed".format(runname)
            cnt = """The run {run} has been demultiplexed and automatic QC took place.
                    The Run will be transferred to Nestor for further analysis.
                        
                    Autmatic QC defines the runs as: {QC}

                    The run is available at : https://genomics-status.scilifelab.se/flowcells/{shortfc}

                    To read the logs, run the following command on {server}
                    grep -A30 "Checking run {run}" {log}

                    """.format(run=runname, QC=QC_result, shortfc=shortrun, log=log_file, server=os.uname()[1])
            misc.send_mail(sj, cnt, rcp)
コード例 #11
0
def _exec_fn(obj, fn):
    try:
        if fn():
            logger.info("{} processed successfully".format(str(obj)))
        else:
            logger.info("{} processed with some errors, check log".format(
                str(obj)))
    except Exception as e:
        logger.exception(e)
        try:
            send_mail(
                subject="[ERROR] processing failed: {}".format(str(obj)),
                content=
                "Project: {}\nSample: {}\nCommand: {}\n\nAdditional information:{}\n"
                .format(obj.projectid, obj.sampleid, str(fn), str(e)),
                receiver=obj.config.get('operator'))
        except Exception as me:
            logger.error(
                "processing {} failed - reason: {}, but operator {} could not be notified - reason: {}"
                .format(str(obj), e, obj.config.get('operator'), me))
        else:
            logger.error(
                "processing {} failed - reason: {}, operator {} has been notified"
                .format(str(obj), str(e), obj.config.get('operator')))
コード例 #12
0
def process_minion_run(minion_run,
                       sequencing_ongoing=False,
                       nanoseq_ongoing=False):
    """Process MinION QC runs.
    
    Will not start nanoseq if a sequencing run is ongoing, to limit memory usage.
    Will also maximum start one nanoseq run at once, for the same reason.
    """
    logger.info('Processing QC run: {}'.format(minion_run.run_dir))
    email_recipients = CONFIG.get('mail').get('recipients')

    if len(minion_run.summary_file) and os.path.isfile(
            minion_run.summary_file[0]) and not os.path.isdir(
                minion_run.nanoseq_dir):
        logger.info(
            'Sequencing done for run {}. Attempting to start analysis.'.format(
                minion_run.run_dir))
        if not minion_run.nanoseq_sample_sheet:
            minion_run.parse_lims_sample_sheet()

        if os.path.isfile(minion_run.nanoseq_sample_sheet):
            if nanoseq_ongoing:
                logger.warn(
                    'Nanoseq already started, will not attempt to start for {}'
                    .format(minion_run.run_dir))
            elif sequencing_ongoing:
                logger.warn(
                    'Sequencing ongoing, will not attempt to start Nanoseq for {}'
                    .format(minion_run.run_dir))
            else:
                minion_run.start_nanoseq()
                nanoseq_ongoing = True

        else:
            logger.warn(
                'Samplesheet not found for run {}. Operator notified. Skipping.'
                .format(minion_run.run_dir))
            email_subject = ('Samplesheet missing for run {}'.format(
                os.path.basename(minion_run.run_dir)))
            email_message = 'There was an issue locating the samplesheet for run {}.'.format(
                minion_run.run_dir)
            send_mail(email_subject, email_message, email_recipients)

    elif os.path.isdir(minion_run.nanoseq_dir) and not os.path.isfile(
            minion_run.nanoseq_exit_status_file):
        logger.info(
            'Nanoseq has started for run {} but is not yet done. Skipping.'.
            format(minion_run.run_dir))

    elif os.path.isdir(minion_run.nanoseq_dir) and os.path.isfile(
            minion_run.nanoseq_exit_status_file):
        nanoseq_successful = minion_run.check_exit_status(
            minion_run.nanoseq_exit_status_file)
        if nanoseq_successful:
            if not os.path.isdir(minion_run.anglerfish_dir):
                logger.info(
                    'Nanoseq done for run {}. Attempting to start Anglerfish.'.
                    format(minion_run.run_id))
                if not minion_run.anglerfish_sample_sheet:
                    minion_run.anglerfish_sample_sheet = os.path.join(
                        minion_run.run_dir, 'anglerfish_sample_sheet.csv'
                    )  # For cronjob, AF sample sheet was generated at previous run
                if os.path.isfile(minion_run.anglerfish_sample_sheet):
                    minion_run.start_anglerfish()
                else:
                    logger.warn(
                        'Anglerfish sample sheet missing for run {}. '
                        'Please provide one using --anglerfish_sample_sheet '
                        'if running TACA manually.'.format(minion_run.run_id))

            elif not os.path.isfile(minion_run.anglerfish_exit_status_file):
                logger.info(
                    'Anglerfish has started for run {} but is not yet done. Skipping.'
                    .format(minion_run.run_id))

            elif os.path.isfile(minion_run.anglerfish_exit_status_file):
                anglerfish_successful = minion_run.check_exit_status(
                    minion_run.anglerfish_exit_status_file)
                if anglerfish_successful:
                    if minion_run.copy_results_for_lims():
                        logger.info(
                            'Anglerfish finished OK for run {}. Notifying operator.'
                            .format(minion_run.run_id))
                        email_subject = (
                            'Anglerfish successfully processed run {}'.format(
                                minion_run.run_id))
                        email_message = (
                            'Anglerfish has successfully finished for run {}. Please '
                            'finish the QC step in lims.').format(
                                minion_run.run_id)
                        send_mail(email_subject, email_message,
                                  email_recipients)
                    else:
                        email_subject = (
                            'Run processed with errors: {}'.format(
                                minion_run.run_id))
                        email_message = (
                            'Anglerfish has successfully finished for run {} but an error '
                            'occurred while transferring the results to lims.'
                        ).format(minion_run.run_id)
                        send_mail(email_subject, email_message,
                                  email_recipients)

                    if minion_run.is_not_transferred():
                        if minion_run.transfer_run():
                            if minion_run.update_transfer_log():
                                logger.info(
                                    'Run {} has been synced to the analysis cluster.'
                                    .format(minion_run.run_id))
                            else:
                                email_subject = (
                                    'Run processed with errors: {}'.format(
                                        minion_run.run_id))
                                email_message = (
                                    'Run {} has been transferred, but an error occurred while updating '
                                    'the transfer log').format(
                                        minion_run.run_id)
                                send_mail(email_subject, email_message,
                                          email_recipients)

                            if minion_run.archive_run():
                                logger.info(
                                    'Run {} is finished and has been archived. Notifying operator.'
                                    .format(minion_run.run_id))
                                email_subject = (
                                    'Run successfully processed: {}'.format(
                                        minion_run.run_id))
                                email_message = (
                                    'Run {} has been analysed, transferred and archived '
                                    'successfully.').format(minion_run.run_id)
                                send_mail(email_subject, email_message,
                                          email_recipients)
                            else:
                                email_subject = (
                                    'Run processed with errors: {}'.format(
                                        minion_run.run_id))
                                email_message = (
                                    'Run {} has been analysed, but an error occurred during '
                                    'archiving').format(minion_run.run_id)
                                send_mail(email_subject, email_message,
                                          email_recipients)

                        else:
                            logger.warn(
                                'An error occurred during transfer of run {} '
                                'to Irma. Notifying operator.'.format(
                                    minion_run.run_id))
                            email_subject = (
                                'Run processed with errors: {}'.format(
                                    minion_run.run_id))
                            email_message = (
                                'Run {} has been analysed, but an error occurred during '
                                'transfer.').format(minion_run.run_id)
                            send_mail(email_subject, email_message,
                                      email_recipients)

                    else:
                        logger.warn(
                            'The following run has already been transferred, '
                            'skipping: {}'.format(minion_run.run_id))

                else:
                    logger.warn(
                        'Anglerfish exited with a non-zero exit status for run {}. '
                        'Notifying operator.'.format(minion_run.run_id))
                    email_subject = ('Run processed with errors: {}'.format(
                        minion_run.run_id))
                    email_message = (
                        'Anglerfish exited with errors for run {}. Please '
                        'check the log files and restart.').format(
                            minion_run.run_id)
                    send_mail(email_subject, email_message, email_recipients)

        else:
            logger.warn(
                'Nanoseq exited with a non-zero exit status for run {}. '
                'Notifying operator.'.format(minion_run.run_id))
            email_subject = ('Analysis failed for run {}'.format(
                minion_run.run_id))
            email_message = 'The nanoseq analysis failed for run {}.'.format(
                minion_run.run_id)
            send_mail(email_subject, email_message, email_recipients)

    else:
        logger.info('Run {} not finished sequencing yet. Skipping.'.format(
            minion_run.run_id))

    return nanoseq_ongoing
コード例 #13
0
ファイル: Runs.py プロジェクト: Hammarn/TACA
    def transfer_run(self, t_file, analysis, mail_recipients=None):
        """ Transfer a run to the analysis server. Will add group R/W permissions to
            the run directory in the destination server so that the run can be processed
            by any user/account in that group (i.e a functional account...). 
            :param str t_file: File where to put the transfer information
            :param bool analysis: Trigger analysis on remote server
        """
        # TODO: check the run type and build the correct rsync command
        # The option -a implies -o and -g which is not the desired behaviour
        command_line = ['rsync', '-Lav', '--no-o', '--no-g']
        # Add R/W permissions to the group
        command_line.append('--chmod=g+rw')
        # This horrible thing here avoids data dup when we use multiple indexes in a lane/FC
        command_line.append("--exclude=Demultiplexing_*/*_*")
        command_line.append("--include=*/")
        for to_include in self.CONFIG['analysis_server']['sync']['include']:
            command_line.append("--include={}".format(to_include))
        command_line.extend(["--exclude=*", "--prune-empty-dirs"])
        r_user = self.CONFIG['analysis_server']['user']
        r_host = self.CONFIG['analysis_server']['host']
        r_dir = self.CONFIG['analysis_server']['sync']['data_archive']
        remote = "{}@{}:{}".format(r_user, r_host, r_dir)
        command_line.extend([self.run_dir, remote])

        # Create temp file indicating that the run is being transferred
        try:
            open(os.path.join(self.run_dir, 'transferring'), 'w').close()
        except IOError as e:
            logger.error("Cannot create a file in {}. "
                         "Check the run name, and the permissions.".format(
                             self.id))
            raise e
        started = ("Started transfer of run {} on {}".format(
            self.id, datetime.now()))
        logger.info(started)
        # In this particular case we want to capture the exception because we want
        # to delete the transfer file
        try:
            msge_text = "I am about to transfer with this command \n{}".format(
                command_line)
            logger.info(msge_text)
            misc.call_external_command(command_line,
                                       with_log_files=True,
                                       prefix="",
                                       log_dir=self.run_dir)
        except subprocess.CalledProcessError as exception:
            os.remove(os.path.join(self.run_dir, 'transferring'))
            #Send an email notifying that the transfer failed
            runname = self.id
            sbt = ("Rsync of run {} failed".format(runname))
            msg = """ Rsync of data for run {run} has failed!
                Raised the following exception:     {e}
            """.format(run=runname, e=exception)
            if mail_recipients:
                send_mail(sbt, msg, mail_recipients)

            raise exception

        logger.info('Adding run {} to {}'.format(self.id, t_file))
        with open(t_file, 'a') as tranfer_file:
            tsv_writer = csv.writer(tranfer_file, delimiter='\t')
            tsv_writer.writerow([self.id, str(datetime.now())])
        os.remove(os.path.join(self.run_dir, 'transferring'))

        #Send an email notifying that the transfer was successful
        runname = self.id
        sbt = ("Rsync of data for run {} to Irma has finished".format(runname))
        msg = """ Rsync of data for run {run} to Irma has finished!
                          
        The run is available at : https://genomics-status.scilifelab.se/flowcells/{run}
        """.format(run=runname)
        if mail_recipients:
            send_mail(sbt, msg, mail_recipients)

        if analysis:
            # This needs to pass the runtype (i.e., Xten or HiSeq) and start the correct pipeline
            self.trigger_analysis()
コード例 #14
0
ファイル: backup.py プロジェクト: zhanglingfei/TACA
class backup_utils(object):
    """A class object with main utility methods related to backing up"""
    def __init__(self, run=None):
        self.run = run
        self.fetch_config_info()
        self.host_name = os.getenv('HOSTNAME', os.uname()[1]).split('.', 1)[0]

    def fetch_config_info(self):
        """Try to fecth required info from the config file. Log and exit if any neccesary info is missing"""
        try:
            self.data_dirs = CONFIG['backup']['data_dirs']
            self.archive_dirs = CONFIG['backup']['archive_dirs']
            self.keys_path = CONFIG['backup']['keys_path']
            self.gpg_receiver = CONFIG['backup']['gpg_receiver']
            self.mail_recipients = CONFIG['mail']['recipients']
            self.check_demux = CONFIG.get('backup',
                                          {}).get('check_demux', False)
            self.couch_info = CONFIG.get('statusdb')
        except KeyError as e:
            logger.error(
                "Config file is missing the key {}, make sure it have all required information"
                .format(str(e)))
            raise SystemExit

    def collect_runs(self, ext=None, filter_by_ext=False):
        """Collect runs from archive directories"""
        self.runs = []
        if self.run:
            run = run_vars(self.run)
            if not re.match(filesystem.RUN_RE, run.name):
                logger.error("Given run {} did not match a FC pattern".format(
                    self.run))
                raise SystemExit
            self.runs.append(run)
        else:
            for adir in self.archive_dirs.values():
                if not os.path.isdir(adir):
                    logger.warn(
                        "Path {} does not exist or it is not a directory".
                        format(adir))
                    continue
                for item in os.listdir(adir):
                    if filter_by_ext and not item.endswith(ext):
                        continue
                    elif item.endswith(ext):
                        item = item.replace(ext, '')
                    elif not os.path.isdir(os.path.join(adir, item)):
                        continue
                    if re.match(filesystem.RUN_RE,
                                item) and item not in self.runs:
                        self.runs.append(run_vars(os.path.join(adir, item)))

    def avail_disk_space(self, path, run):
        """Check the space on file system based on parent directory of the run"""
        # not able to fetch runtype use the max size as precaution, size units in GB
        illumina_run_sizes = {
            'hiseq': 500,
            'hiseqx': 900,
            'novaseq': 1800,
            'miseq': 20
        }
        required_size = illumina_run_sizes.get(self._get_run_type(run),
                                               900) * 2
        # check for any ongoing runs and add up the required size accrdingly
        for ddir in self.data_dirs.values():
            if not os.path.isdir(ddir):
                continue
            for item in os.listdir(ddir):
                if not re.match(filesystem.RUN_RE, item):
                    continue
                if not os.path.exists(
                        os.path.join(ddir, item, "RTAComplete.txt")):
                    required_size += illumina_run_sizes.get(
                        self._get_run_type(run), 900)
        # get available free space from the file system
        try:
            df_proc = sp.Popen(['df', path], stdout=sp.PIPE, stderr=sp.PIPE)
            df_out, df_err = df_proc.communicate()
            available_size = int(df_out.strip().split('\n')
                                 [-1].strip().split()[2]) / 1024 / 1024
        except Exception, e:
            logger.error(
                "Evaluation of disk space failed with error {}".format(e))
            raise SystemExit
        if available_size < required_size:
            e_msg = "Required space for encryption is {}GB, but only {}GB available".format(
                required_size, available_size)
            subjt = "Low space for encryption - {}".format(self.host_name)
            logger.error(e_msg)
            misc.send_mail(subjt, e_msg, self.mail_recipients)
            raise SystemExit
コード例 #15
0
def process_run(run_dir, nanoseq_sample_sheet, anglerfish_sample_sheet):
    """Proceess nanopore runs."""
    qc_run = True
    if nanoseq_sample_sheet and not anglerfish_sample_sheet:
        qc_run = False

    logger.info('Processing run: {} as a {}'.format(run_dir, 'QC run' if qc_run else 'non-QC run'))
    summary_file = glob.glob(run_dir + '/final_summary*.txt')[0]
    nanoseq_dir = os.path.join(run_dir, 'nanoseq_output')
    anglerfish_dir = os.path.join(run_dir, 'anglerfish_output')
    anglerfish_sample_sheet = os.path.join(run_dir, 'anglerfish_sample_sheet.csv')
    nanoseq_exit_status_file = os.path.join(run_dir, '.exitcode_for_nanoseq')
    anglerfish_exit_status_file = os.path.join(run_dir, '.exitcode_for_anglerfish')
    email_recipients = CONFIG.get('mail').get('recipients')

    if os.path.isfile(summary_file) and not os.path.isdir(nanoseq_dir):
        logger.info('Sequencing done for run {}. Attempting to start analysis.'.format(run_dir))
        if not nanoseq_sample_sheet:
            nanoseq_sample_sheet = parse_lims_sample_sheet(run_dir)

        if os.path.isfile(nanoseq_sample_sheet):
            start_nanoseq(run_dir, nanoseq_sample_sheet)

        else:
            logger.warn('Samplesheet not found for run {}. Operator notified. Skipping.'.format(run_dir))
            email_subject = ('Samplesheet missing for run {}'.format(os.path.basename(run_dir)))
            email_message = 'There was an issue locating the samplesheet for run {}.'.format(run_dir)
            send_mail(email_subject, email_message, email_recipients)

    elif os.path.isdir(nanoseq_dir) and not os.path.isfile(nanoseq_exit_status_file):
        logger.info('Nanoseq has started for run {} but is not yet done. Skipping.'.format(run_dir))

    elif os.path.isdir(nanoseq_dir) and os.path.isfile(nanoseq_exit_status_file):
        nanoseq_successful = check_exit_status(nanoseq_exit_status_file)
        if nanoseq_successful:
            run_id = os.path.basename(run_dir)
            transfer_log = CONFIG.get('nanopore_analysis').get('transfer').get('transfer_file')

            if qc_run and not os.path.isdir(anglerfish_dir):
                logger.info('Nanoseq done for run {}. Attempting to start Anglerfish.'.format(run_id))
                start_anglerfish(run_dir, anglerfish_sample_sheet, anglerfish_dir)

            elif qc_run and not os.path.isfile(anglerfish_exit_status_file):
                logger.info('Anglerfish has started for run {} but is not yet done. Skipping.'.format(run_id))

            elif qc_run and os.path.isfile(anglerfish_exit_status_file):
                anglerfish_successful = check_exit_status(anglerfish_exit_status_file)
                if anglerfish_successful:
                    copy_results_for_lims(run_dir, anglerfish_dir)
                    logger.info('Anglerfish finished OK for run {}. Notifying operator.'.format(run_id))
                    email_subject = ('Anglerfish successfully processed run {}'.format(os.path.basename(run_id)))
                    email_message = ('Anglerfish has successfully finished for run {}. Please '
                                     'finish the QC step in lims.').format(run_id)
                    send_mail(email_subject, email_message, email_recipients)

                    if is_not_transferred(run_id, transfer_log):
                        if transfer_run(run_dir):
                            update_transfer_log(run_id, transfer_log)
                            logger.info('Run {} has been synced to the analysis cluster.'.format(run_id))
                            archive_run(run_dir)
                            logger.info('Run {} is finished and has been archived. Notifying operator.'.format(run_id))
                            email_subject = ('Run successfully processed: {}'.format(os.path.basename(run_id)))
                            email_message = ('Run {} has been analysed, transferred and archived '
                                             'successfully.').format(run_id)
                            send_mail(email_subject, email_message, email_recipients)

                        else:
                            logger.warn('An error occurred during transfer of run {} '
                                        'to Irma. Notifying operator.'.format(run_dir))
                            email_subject = ('Run processed with errors: {}'.format(os.path.basename(run_id)))
                            email_message = ('Run {} has been analysed, but an error occurred during '
                                             'transfer.').format(run_id)
                            send_mail(email_subject, email_message, email_recipients)

                    else:
                        logger.warn('The following run has already been transferred, '
                                    'skipping: {}'.format(run_dir))

                else:
                    logger.warn('Anglerfish exited with a non-zero exit status for run {}. '
                                'Notifying operator.'.format(run_dir))
                    email_subject = ('Run processed with errors: {}'.format(os.path.basename(run_id)))
                    email_message = ('Anglerfish exited with errors for run {}. Please '
                                     'check the log files and restart.').format(run_id)
                    send_mail(email_subject, email_message, email_recipients)

            elif not qc_run:
                if is_not_transferred(run_id, transfer_log):
                    if transfer_run(run_dir):
                        update_transfer_log(run_id, transfer_log)
                        logger.info('Run {} has been synced to the analysis cluster.'.format(run_id))
                        archive_run(run_dir)
                        logger.info('Run {} is finished and has been archived. Notifying operator.'.format(run_id))
                        email_subject = ('Run successfully processed: {}'.format(run_id))
                        email_message = ('Run {} has been analysed, transferred and archived '
                                         'successfully.').format(run_id)
                        send_mail(email_subject, email_message, email_recipients)

                    else:
                        logger.warn('An error occurred during transfer of run {} '
                                    'to Irma. Notifying operator.'.format(run_dir))
                        email_subject = ('Run processed with errors: {}'.format(run_id))
                        email_message = ('Run {} has been analysed, but an error occurred during '
                                         'transfer.').format(run_id)
                        send_mail(email_subject, email_message, email_recipients)

                else:
                    logger.warn('The following run has already been transferred, '
                                'skipping: {}'.format(run_id))

        else:
            logger.warn('Nanoseq exited with a non-zero exit status for run {}. '
                        'Notifying operator.'.format(run_dir))
            email_subject = ('Analysis failed for run {}'.format(os.path.basename(run_dir)))
            email_message = 'The nanoseq analysis failed for run {}.'.format(run_dir)
            send_mail(email_subject, email_message, email_recipients)

    else:
        logger.info('Run {} not finished sequencing yet. Skipping.'.format(run_dir))

    return