Ejemplo n.º 1
0
def main(argv=None):

    # get the options
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--basedir",
        dest="basedir",
        action="store",
        help=
        "sequencing server base directories e.g. '/solexa0[1-8]/data/Runs'",
        required=True)
    parser.add_argument(
        "--runfolder",
        dest="run_folder",
        action="store",
        help="run folder e.g. '130114_HWI-ST230_1016_D18MAACXX'")
    parser.add_argument(
        "--dry-run",
        dest="dry_run",
        action="store_true",
        default=False,
        help=
        "use this option to not do any shell command execution, only report actions"
    )
    parser.add_argument("--logfile",
                        dest="logfile",
                        action="store",
                        default=False,
                        help="File to print logging information")

    options = parser.parse_args()

    # logging configuration
    if options.logfile:
        log = logger.get_custom_logger(options.logfile)
    else:
        log = logger.get_custom_logger()

    try:
        runs = auto_data.RunFolderList(options.basedir, "", None,
                                       options.run_folder)
        # loop over all run folders in options.basedir
        for run_folder in runs.run_folders:
            try:
                # create Sequencing.completed
                log.info(auto_data.RUN_HEADER % {'run_folder': run_folder})
                create_sequencing_completed(run_folder, options.dry_run)
            except:
                log.exception("Unexpected error")
                continue
    except:
        log.exception("Unexpected error")
        raise
Ejemplo n.º 2
0
def main():
    # get the options
    parser = argparse.ArgumentParser()
    parser.add_argument("--basedir", dest="basedir", action="store", help="old run folder directories e.g. '/processing/OldRuns/'", required=True)
    parser.add_argument("--thumbnails", dest="thumbnails", action="store", help="number of days to keep thumbnails - default set to 90", default=90, type=int)
    parser.add_argument("--intensities", dest="intensities", action="store", help="number of days to keep intensities - default set to 21", default=21, type=int)
    parser.add_argument("--images", dest="images", action="store", help="number of days to keep images - default set to 14", default=14, type=int)
    parser.add_argument("--limsdev", dest="use_limsdev", action="store_true", default=False, help="Use the development LIMS url")
    parser.add_argument("--dry-run", dest="dry_run", action="store_true", default=False, help="use this option to not do any shell command execution, only report actions")
    parser.add_argument("--logfile", dest="logfile", action="store", default=False, help="File to print logging information")

    options = parser.parse_args()

    # logging configuration
    if options.logfile:
        log = logger.get_custom_logger(options.logfile)
    else:
        log = logger.get_custom_logger()
                  
    try:
        # connect to lims
        glslims = auto_glslims.GlsLims(options.use_limsdev)
        
        # setting-up time
        present = time.time()
        delete_thumbnails_older_than = convert_day(options.thumbnails)
        delete_images_older_than = convert_day(options.images)
        delete_intensities_older_than = convert_day(options.intensities)
        move_folder_older_than = convert_day(options.thumbnails+options.images+options.intensities)

        # loop over all runs in options.basedir
        runs = auto_data.RunFolderList(options.basedir, None, None)
        all_runs = runs.all_runs()
        for run in all_runs:
            try:
                log.info(run.get_header())
                # check dont.delete is not present - do not clean if present
                if not os.path.exists(run.dont_delete):
                    
                    # check SequencingComplete.txt and SyncComplete.txt and Primary Fastq Files Found in lims OR SequencingFail.txt present
                    if ( os.path.exists(run.sequencing_completed) and os.path.exists(run.sync_completed) and glslims.are_fastq_files_attached(run.run_folder_name) ) or os.path.exists(run.sequencing_failed):
                        if os.path.exists(run.sequencing_completed):
                            runfolder_age = present - os.path.getmtime(os.path.join(run.run_folder, 'Data'))
                            log.info('[IMG:%s|INT:%s|PIC:%s] run completed %s ago' % (options.images, options.intensities, options.thumbnails, datetime.timedelta(seconds=runfolder_age)))
                        else:
                            runfolder_age = present - os.path.getmtime(run.sequencing_failed)
                            log.info('[IMG:%s|INT:%s|PIC:%s] run failed %s ago' % (options.images, options.intensities, options.thumbnails, datetime.timedelta(seconds=runfolder_age)))
                            
                        # check deleting file has been done already
                        if is_completed(run.run_folder, 'delete_images') and is_completed(run.run_folder, 'delete_intensities') and is_completed(run.run_folder, 'delete_thumbnails'):
                            log.info('All images/intensities/thumbnails deleted')
                            # moving run folders to OldRuns after thumbnails+images+intensities days
                            if runfolder_age > move_folder_older_than:
                                oldruns_path = os.path.join(os.path.dirname(run.run_folder), 'OldRuns')
                                move_runfolder_cmd = ['mv', run.run_folder, oldruns_path]
                                utils.create_directory(oldruns_path)
                                log.info('moving run folder...')
                                utils.run_bg_process(move_runfolder_cmd, options.dry_run)
                        else:
                            # deleting images
                            if is_completed(run.run_folder, 'delete_images'):
                                log.info('All images deleted')
                            else:
                                if runfolder_age > delete_images_older_than:
                                    delete_images_cmd = "find %s -name *.tif -delete" % run.run_folder
                                    setup_clean(run.run_folder, 'delete_images', delete_images_cmd)
                                    clean(run.run_folder, 'delete_images', options.dry_run)
                            # deleting intensities
                            if is_completed(run.run_folder, 'delete_intensities'):
                                log.info('All intensities deleted')
                            else:
                                if runfolder_age > delete_intensities_older_than:
                                    delete_intensities_cmd = "find %s/Data/Intensities/ \( -name *_pos.txt -or -name *.cif -or -name *.filter -or -name *.bcl -or -name *.stats \) -delete" % run.run_folder
                                    setup_clean(run.run_folder, 'delete_intensities', delete_intensities_cmd)
                                    clean(run.run_folder, 'delete_intensities', options.dry_run)
                            # deleting thumbnails
                            if is_completed(run.run_folder, 'delete_thumbnails'):
                                log.info('All thumbnails deleted')
                            else:
                                if runfolder_age > delete_thumbnails_older_than:
                                    delete_thumbnails_cmd = "find %s/Thumbnail_Images/ -name *.jpg -delete" % run.run_folder
                                    setup_clean(run.run_folder, 'delete_thumbnails', delete_thumbnails_cmd)
                                    clean(run.run_folder, 'delete_thumbnails', options.dry_run)
                    else:
                        log.info('run folder %s not ready to be deleted' % run.run_folder)
                        
                else:
                    log.debug('%s is present' % run.dont_delete)
            except:
                log.exception("Unexpected error")
                continue
                
    except:
        log.exception("Unexpected error")
        raise
Ejemplo n.º 3
0
def main():
    # get the options
    parser = argparse.ArgumentParser()
    parser.add_argument("--processingdir", dest="processingdir", action="store", help="processing base directories e.g. '/processing'", required=True)
    parser.add_argument("--stagingdir", dest="stagingdir", action="store", help="staging base directories e.g. '/staging'", required=True)
    parser.add_argument("--clusterdir", dest="clusterdir", action="store", help="cluster base directory e.g. '/lustre/mib-cri/solexa/Runs'", default=None)

    parser.add_argument("--softdir", dest="softdir", action="store", default=cfg['SOFT_PIPELINE_PATH'], help="software base directory where pipelines are installed - default set to %s" % cfg['SOFT_PIPELINE_PATH'])
    parser.add_argument("--cluster", dest="cluster", action="store", help="cluster hostname e.g. %s" % cfg['CLUSTER_HOST'])

    parser.add_argument("--runfolder", dest="run_folder", action="store", help="run folder e.g. '130114_HWI-ST230_1016_D18MAACXX'")
    parser.add_argument("--step", dest="step", action="store", choices=list(cfg['PIPELINES_SETUP_OPTIONS'].viewkeys()), help="pipeline step to choose from %s" % cfg['PIPELINES_ORDER'])
    parser.add_argument("--dry-run", dest="dry_run", action="store_true", default=False, help="use this option to not do any shell command execution, only report actions")
    parser.add_argument("--limsdev", dest="use_limsdev", action="store_true", default=False, help="Use the development LIMS url")
    parser.add_argument("--donot-run-pipelines", dest="donot_run_pipelines", action="store_true", default=False, help="use this option to DO NOT run the pipelines")
    parser.add_argument("--logfile", dest="logfile", action="store", default=None, help="File to print logging information")
    parser.add_argument("--nologemail", dest="nologemail", action="store_true", default=False, help="turn off sending log emails on error")

    parser.add_argument("--noalignment", dest="noalignment", action="store_true", default=False, help="turn off alignment pipeline completely")
    parser.add_argument("--local", dest="local", action="store_true", default=False, help="run all pipelines locally, overwriting default mode")

    options = parser.parse_args()

    # logging configuration
    log = logger.get_custom_logger(options.logfile, options.nologemail)

    # check if python script is already running
    pid = str(os.getpid())
    pidfile = "/tmp/autoanalysis_daemon.pid"
    if os.path.isfile(pidfile):
        log.info("%s already exists, exiting" % pidfile)
        sys.exit()
    else:
        file(pidfile, 'w').write(pid)

    # unset these variables if all pipelines should run locally
    if options.local:
        options.clusterdir = None
        options.cluster = None

    try:
        # loop over all runs that have a Sequencing.completed file in options.processingdir
        runs = auto_data.RunFolderList(options.processingdir, options.stagingdir, options.clusterdir, options.run_folder)
        # connect to lims
        glslims = auto_glslims.GlsLims(options.use_limsdev)
        for run in runs.runs_to_analyse:
            try:
                log.info(run.get_header())

                # are all sample fastq files attached in lims for this run?
                are_files_attached = glslims.are_fastq_files_attached(run.run_folder_name)
                # get external data when lane and sample fastq files are recorded in ClarityFiles DB
                external_data = glslims.find_external_data(run.run_folder_name)
                # is alignment active for this run?
                if options.noalignment:
                    is_alignment_active = False
                else:
                    is_alignment_active = glslims.is_alignment_active(run.run_folder_name)

                # setup and run pipelines
                pipelines = auto_pipelines.Pipelines(run, options.step, options.softdir, options.cluster, options.dry_run, options.use_limsdev, is_alignment_active, options.local)
                if not options.donot_run_pipelines:
                    pipelines.execute()

                # create auto analysis process in lims
                glslims.create_auto_pipeline_reports_process(run.run_folder_name)

                # synchronise data to staging area
                sync = auto_pipelines.Sync(run, options.dry_run)
                sync.execute()

                # synchronise external data to ftp server
                external = auto_pipelines.External(run, are_files_attached, external_data, options.dry_run)
                external.execute()

                # add flow-cell into the publishing queue
                glslims.publish_flowcell(run, are_files_attached)

            except Exception, e:
                log.exception("Unexpected error")
                log.exception(e)
                continue
    except:
        log.exception("Unexpected error")
        raise
    finally:
        os.unlink(pidfile)
Ejemplo n.º 4
0
def main():
    # get the options
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--basedir",
        dest="basedir",
        action="store",
        help="lustre base directory e.g. '/lustre/mib-cri/solexa/Runs'",
        required=True)
    parser.add_argument(
        "--trashdir",
        dest="trashdir",
        action="store",
        help="trash directory e.g. '/lustre/mib-cri/solexa/Trash_Runs'",
        required=True)
    parser.add_argument(
        "--runfolder",
        dest="run_folder",
        action="store",
        help="run folder e.g. '130114_HWI-ST230_1016_D18MAACXX'")
    parser.add_argument(
        "--dry-run",
        dest="dry_run",
        action="store_true",
        default=False,
        help=
        "use this option to not do any shell command execution, only report actions"
    )
    parser.add_argument("--logfile",
                        dest="logfile",
                        action="store",
                        default=False,
                        help="File to print logging information")

    options = parser.parse_args()

    # logging configuration
    if options.logfile:
        log = logger.get_custom_logger(options.logfile)
    else:
        log = logger.get_custom_logger()

    try:
        # loop over all runs that have a Analysis.completed and Publishing.assigned and not dont.delete files in options.basedir
        runs = auto_data.RunFolderList(options.basedir, '', None,
                                       options.run_folder)
        for run in runs.published_runs:
            try:
                log.info(run.get_header())
                log.info('*** run folder move to trash')
                if os.path.exists(run.dont_delete):
                    log.info('%s is present' % run.dont_delete)
                else:
                    cmd = ['mv', run.run_folder, options.trashdir]
                    utils.run_bg_process(cmd, options.dry_run)
            except:
                log.exception("Unexpected error")
                continue

        # delete all run folders older than 3 days in options.trashdir
        trash_run_folders = glob.glob("%s/??????_*_*_*" % options.trashdir)
        older = 60 * 60 * 24 * 3  # convert 3 days to seconds
        present = time.time()
        for run_folder in trash_run_folders:
            log.info(
                '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
            )
            log.info('~~~ TRASH RUN: %s' % run_folder)
            log.info(
                '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
            )
            try:
                if (present - os.path.getmtime(run_folder)) > older:
                    log.info('*** run folder removed')
                    cmd = ['rm', '-rf', run_folder]
                    utils.run_bg_process(cmd, options.dry_run)
            except:
                log.exception("Unexpected error")
                continue
    except:
        log.exception("Unexpected error")
        raise
Ejemplo n.º 5
0
def main():
    # get the options
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--clusterdir",
        dest="clusterdir",
        action="store",
        help="cluster base directory e.g. '/lustre/mib-cri/solexa/Runs'",
        required=True)
    parser.add_argument("--processingdir",
                        dest="processingdir",
                        action="store",
                        help="processing base directories e.g. '/processing'",
                        required=True)
    parser.add_argument("--stagingdir",
                        dest="stagingdir",
                        action="store",
                        help="staging base directories e.g. '/staging'",
                        required=True)

    parser.add_argument("--processeddir",
                        dest="processeddir",
                        action="store",
                        default=os.path.join('processing', 'ProcessedRuns'),
                        help="processed runs directory on processing",
                        required=True)
    parser.add_argument("--trashdir",
                        dest="trashdir",
                        action="store",
                        default=os.path.join('lustre', 'mib-cri', 'solexa',
                                             'TrashRuns'),
                        help="trash runs directory on cluster",
                        required=True)

    parser.add_argument(
        "--folders",
        dest="folders",
        action="store",
        help="number of days to keep run folders - default set to 100",
        default=100,
        type=int)
    parser.add_argument(
        "--thumbnails",
        dest="thumbnails",
        action="store",
        help="number of days to keep thumbnails - default set to 90",
        default=90,
        type=int)
    parser.add_argument(
        "--intensities",
        dest="intensities",
        action="store",
        help="number of days to keep intensities - default set to 21",
        default=21,
        type=int)
    parser.add_argument(
        "--images",
        dest="images",
        action="store",
        help="number of days to keep images - default set to 14",
        default=14,
        type=int)

    parser.add_argument(
        "--runfolder",
        dest="run_folder",
        action="store",
        help="run folder e.g. '130114_HWI-ST230_1016_D18MAACXX'")
    parser.add_argument(
        "--dry-run",
        dest="dry_run",
        action="store_true",
        default=False,
        help=
        "use this option to not do any shell command execution, only report actions"
    )
    parser.add_argument("--dev-lims",
                        dest="use_dev_lims",
                        action="store_true",
                        default=False,
                        help="Use the development LIMS url")
    parser.add_argument("--logfile",
                        dest="logfile",
                        action="store",
                        default=False,
                        help="File to print logging information")
    parser.add_argument("--nologemail",
                        dest="nologemail",
                        action="store_true",
                        default=False,
                        help="turn off sending log emails on error")

    options = parser.parse_args()

    # logging configuration
    log = logger.get_custom_logger(options.logfile, options.nologemail)

    # setting up directories
    utils.create_directory(options.processeddir)
    if os.path.exists(options.clusterdir):
        utils.create_directory(options.trashdir)

    # setting-up time
    present = time.time()
    delete_thumbnails_older_than = convert_day(options.thumbnails)
    delete_images_older_than = convert_day(options.images)
    delete_intensities_older_than = convert_day(options.intensities)
    deleted_folders_older_than = convert_day(options.folders)

    try:
        # lims connection
        glslims = auto_glslims.GlsLims(options.use_dev_lims)
        # loop over all runs in options.processingdir
        runs = auto_data.RunFolderList(options.processingdir,
                                       options.stagingdir, options.clusterdir,
                                       options.run_folder, False)

        ### print run reports ...........................................................
        log.info(
            '********************************************************************************'
        )
        log.info(
            '*** RUN REPORTS ****************************************************************'
        )
        log.info(
            '********************************************************************************'
        )
        for run in runs.runs_to_analyse:
            log.info('TO ANALYSE  %s' % run.run_folder)
        for run in runs.unknown_runs:
            log.info('UNKNOWN     %s' % run.run_folder)
        log.info('--------------------')
        log.info(' COMPLETED RUNS: %s' % len(runs.completed_runs))
        log.info('    FAILED RUNS: %s' % len(runs.failed_runs))
        log.info('   UNKNOWN RUNS: %s' % len(runs.unknown_runs))
        log.info('--------------------')
        log.info('RUNS TO ANALYSE: %s' % len(runs.runs_to_analyse))
        log.info('  ANALYSED RUNS: %s' % len(runs.analysed_runs))
        log.info('    SYNCED RUNS: %s' % len(runs.synced_runs))
        log.info(' PUBLISHED RUNS: %s' % len(runs.published_runs))
        log.info('--------------------')

        ### update run status ...........................................................
        log.info(
            '********************************************************************************'
        )
        log.info(
            '*** UPDATE RUN STATUS **********************************************************'
        )
        log.info(
            '********************************************************************************'
        )
        for run in runs.all_runs:
            try:
                # add SequencingComplete.txt or SequencingFail.txt by retrieving info from lims on run
                if not run.is_sequencing_status_present():
                    log.info('*** %s' % run.run_folder_name)
                    is_sequencing_complete = glslims.is_sequencing_run_complete(
                        run.run_folder_name)
                    run.update_sequencing_status(is_sequencing_complete,
                                                 options.dry_run)
            except Exception, e:
                log.exception("Unexpected error")
                log.exception(e)
                continue

        ### manage runs .................................................................
        log.info(
            '********************************************************************************'
        )
        log.info(
            '*** MANAGE PUBLISHED RUNS ******************************************************'
        )
        log.info(
            '********************************************************************************'
        )
        # move published runs in cluster into options.trashdir and in processing into options.processeddir
        for run in runs.published_runs:
            try:
                if os.path.exists(run.ignore_me):
                    log.info('%s is present' % run.ignore_me)
                elif os.path.exists(run.dont_delete):
                    log.info('%s is present' % run.dont_delete)
                else:
                    cmd = ['mv', run.run_folder, options.processeddir]
                    utils.run_bg_process(cmd, options.dry_run)
                    log.info('*** run %s moved to %s' %
                             (run.run_folder_name, options.processeddir))
                    if os.path.exists(run.cluster_run_folder):
                        cmd = ['mv', run.cluster_run_folder, options.trashdir]
                        utils.run_bg_process(cmd, options.dry_run)
                        log.info('*** run %s on cluster moved to %s' %
                                 (run.run_folder_name, options.trashdir))
            except Exception, e:
                log.exception("Unexpected error")
                log.exception(e)
                continue