Ejemplo n.º 1
0
def main():
    logger = init_logging(os.path.join(home, 'kive_download.log'),
                          file_log_level=logging.INFO,
                          console_log_level=logging.INFO)
    args = parse_args()

    logger.info('Starting.')
    kive = kive_login(kive_server_url, kive_user, kive_password)
    if args.batchdate is not None:
        runs = find_batch_runs(kive, args.batchdate, args.batchsize)
    else:
        runs = find_old_runs(kive,
                             startafter=args.startafter,
                             startbefore=args.startbefore)
    unfinished_count = 0
    for sample_name, run in runs:
        progress = run.json.get('run_progress')
        if progress:
            start_time = progress['start']
            end_time = progress['end']
        else:
            start_time = end_time = None
        if end_time is None:
            unfinished_count += 1
        print(run.json['display_name'])
        print('  ' + sample_name)
        print('  {} - {}'.format(start_time, end_time))
    if args.workfolder or args.resultfolder:
        download_results(runs, args.resultfolder, args.workfolder)
    logger.info('%d runs found (%d unfinished).', len(runs), unfinished_count)
Ejemplo n.º 2
0
def main():
    logger = init_logging(
        os.path.join(home, "kive_download.log"), file_log_level=logging.INFO, console_log_level=logging.INFO
    )
    args = parse_args()

    logger.info("Starting.")
    kive = kive_login(kive_server_url, kive_user, kive_password)
    if args.batchdate is not None:
        runs = find_batch_runs(kive, args.batchdate, args.batchsize)
    else:
        runs = find_old_runs(kive, startafter=args.startafter, startbefore=args.startbefore)
    unfinished_count = 0
    for sample_name, run in runs:
        progress = run.json.get("run_progress")
        if progress:
            start_time = progress["start"]
            end_time = progress["end"]
        else:
            start_time = end_time = None
        if end_time is None:
            unfinished_count += 1
        print(run.json["display_name"])
        print("  " + sample_name)
        print("  {} - {}".format(start_time, end_time))
    if args.workfolder or args.resultfolder:
        download_results(runs, args.resultfolder, args.workfolder)
    logger.info("%d runs found (%d unfinished).", len(runs), unfinished_count)
Ejemplo n.º 3
0
def main():
    args = parseOptions()
    log_file = "{}/run.log".format(args.run_folder)
    logger = miseq_logging.init_logging(log_file,
                                        file_log_level=logging.DEBUG,
                                        console_log_level=logging.INFO)
    logger.info('Start processing run %s', args.run_folder)
    if args.clean:
        logger.info('Clean mode ON')
    else:
        logger.info('Clean mode OFF')

    try:
        logger.info('Removing old working files')
        excluded_files = ('.fastq',
                          'SampleSheet.csv',
                          '.launch',
                          'MISEQ_MONITOR_OUTPUT.log',
                          'run.log',
                          'quality.csv')
        old_files = glob(args.run_folder+'/*')
        for f in old_files:
            is_excluded = False
            for ending in excluded_files:
                if f.endswith(ending):
                    is_excluded = not (f.endswith('unmapped1.fastq') or
                                       f.endswith('unmapped2.fastq') or
                                       f.endswith('censored1.fastq') or
                                       f.endswith('censored2.fastq'))
                    break
            if not is_excluded:
                if os.path.isdir(f):
                    shutil.rmtree(f)
                else:
                    os.remove(f)
            
        # Check for Open MPI
        prefix = ''
        expected_version = 'Open MPI'
        version = check_mpi_version(prefix)
        if not expected_version in version:
            prefix = 'module load openmpi/gnu && '
            version = check_mpi_version(prefix)
            if not expected_version in version:
                sys.exit("Couldn't find Open MPI:\n{}".format(version))
        monitor_path = os.path.abspath(os.path.dirname(__file__))
        
        base_args =    ['mpirun', 
                        '-np', 
                        '1', 
                        '--hostfile', 
                        os.path.join(monitor_path, 'hostfile'), 
                        os.path.join(monitor_path, 'sample_pipeline.py'),
                        args.run_folder]

        if args.mode is not None:
            base_args.append(args.mode)
        base_args.append('--phase')
        
        filter_args = base_args[:]
        filter_args.append('filter')
        filter_command = prefix + ' '.join(filter_args)
        
        mapping_args = base_args[:]
        mapping_args[2] = str(settings.mapping_processes)
        mapping_args.append('mapping')
        mapping_command = prefix + ' '.join(mapping_args)
        
        counting_args = base_args[:]
        counting_args[2] = str(settings.counting_processes)
        counting_args.append('counting')
        counting_command = prefix + ' '.join(counting_args)
        
        summarizing_args = base_args[:]
        summarizing_args.append('summarizing')
        summarizing_command = prefix + ' '.join(summarizing_args)
        
        subprocess.check_call(filter_command, shell=True)
        
        subprocess.check_call(mapping_command, shell=True)
    
        subprocess.check_call(counting_command, shell=True)
    
        subprocess.check_call(summarizing_command, shell=True)

        if args.clean:
            # remove intermediate files
            logger.info('Removing large working files, clean mode')
            files_to_remove = glob(args.run_folder+'/*.prelim.csv')
            files_to_remove += glob(args.run_folder+'/*.remap.csv')
            files_to_remove += glob(args.run_folder+'/*.aligned.csv')
            files_to_remove += glob(args.run_folder+'/*.censored?.fastq')
            for f in files_to_remove:
                os.remove(f)

        logger.info('Finished processing run %s', args.run_folder)
    except:
        logger.error('Failed to process run %s', args.run_folder, exc_info=True)
        exit(1)
Ejemplo n.º 4
0
def main():
    args = parseOptions()
    log_file = "{}/run.log".format(args.run_folder)
    logger = miseq_logging.init_logging(log_file,
                                        file_log_level=logging.DEBUG,
                                        console_log_level=logging.INFO)
    logger.info('Start processing run %s', args.run_folder)
    if args.clean:
        logger.info('Clean mode ON')
    else:
        logger.info('Clean mode OFF')

    try:
        logger.info('Removing old working files')
        excluded_files = ('.fastq', 'SampleSheet.csv', '.launch',
                          'MISEQ_MONITOR_OUTPUT.log', 'run.log', 'quality.csv')
        old_files = glob(args.run_folder + '/*')
        for f in old_files:
            is_excluded = False
            for ending in excluded_files:
                if f.endswith(ending):
                    is_excluded = not (f.endswith('unmapped1.fastq')
                                       or f.endswith('unmapped2.fastq')
                                       or f.endswith('censored1.fastq')
                                       or f.endswith('censored2.fastq'))
                    break
            if not is_excluded:
                if os.path.isdir(f):
                    shutil.rmtree(f)
                else:
                    os.remove(f)

        # Check for Open MPI
        prefix = ''
        expected_version = 'Open MPI'
        version = check_mpi_version(prefix)
        if not expected_version in version:
            prefix = 'module load openmpi/gnu && '
            version = check_mpi_version(prefix)
            if not expected_version in version:
                sys.exit("Couldn't find Open MPI:\n{}".format(version))
        monitor_path = os.path.abspath(os.path.dirname(__file__))

        base_args = [
            'mpirun', '-np', '1', '--hostfile',
            os.path.join(monitor_path, 'hostfile'),
            os.path.join(monitor_path, 'sample_pipeline.py'), args.run_folder
        ]

        if args.mode is not None:
            base_args.append(args.mode)
        base_args.append('--phase')

        filter_args = base_args[:]
        filter_args.append('filter')
        filter_command = prefix + ' '.join(filter_args)

        mapping_args = base_args[:]
        mapping_args[2] = str(settings.mapping_processes)
        mapping_args.append('mapping')
        mapping_command = prefix + ' '.join(mapping_args)

        counting_args = base_args[:]
        counting_args[2] = str(settings.counting_processes)
        counting_args.append('counting')
        counting_command = prefix + ' '.join(counting_args)

        summarizing_args = base_args[:]
        summarizing_args.append('summarizing')
        summarizing_command = prefix + ' '.join(summarizing_args)

        subprocess.check_call(filter_command, shell=True)

        subprocess.check_call(mapping_command, shell=True)

        subprocess.check_call(counting_command, shell=True)

        subprocess.check_call(summarizing_command, shell=True)

        if args.clean:
            # remove intermediate files
            logger.info('Removing large working files, clean mode')
            files_to_remove = glob(args.run_folder + '/*.prelim.csv')
            files_to_remove += glob(args.run_folder + '/*.remap.csv')
            files_to_remove += glob(args.run_folder + '/*.aligned.csv')
            files_to_remove += glob(args.run_folder + '/*.censored?.fastq')
            for f in files_to_remove:
                os.remove(f)

        logger.info('Finished processing run %s', args.run_folder)
    except:
        logger.error('Failed to process run %s',
                     args.run_folder,
                     exc_info=True)
        exit(1)
Ejemplo n.º 5
0
def main():
    comm = MPI.COMM_WORLD  # @UndefinedVariable
    process_rank = comm.Get_rank()
    process_count = comm.Get_size()

    args = parseOptions(comm)
    log_file = "{}/pipeline{}.log".format(args.run_folder, process_rank)
    logger = miseq_logging.init_logging(log_file,
                                        file_log_level=logging.DEBUG,
                                        console_log_level=logging.INFO)
    logger.info('Start processing run %s, rank %d',
                args.run_folder,
                process_rank)

    if args.mode is not None:
        run_info = None
    else:
        with open(args.run_folder+'/SampleSheet.csv', 'rU') as sample_sheet:
            logger.debug("sample_sheet_parser({})".format(sample_sheet))
            run_info = sample_sheet_parser(sample_sheet)
            args.mode = run_info['Description']

    fastq_samples = []
    fastq_files = glob(args.run_folder + '/*_R1_001.fastq')
    for i, fastq in enumerate(fastq_files):
        if i % process_count != process_rank:
            # skip samples that are assigned to other worker processes
            continue

        sample_info = SampleInfo(fastq)

        # verify this sample is in SampleSheet.csv
        if run_info and sample_info.key not in run_info['Data']:
            logger.error(
                '{} not in SampleSheet.csv - cannot map this sample'.format(
                    sample_info.key))
            continue

        fastq_samples.append(sample_info)

    def launch_callback(command):
        logger.info("Launching {!r}".format(command))

    worker = Worker(launch_callback=launch_callback,
                    working_path=args.run_folder,
                    are_temp_folders_deleted=are_temp_folders_deleted,
                    logger=logger)

    if args.phase in ('filter', 'all'):
        filter_quality(args.run_folder, worker)

    if args.phase in ('mapping', 'all'):
        map_samples(args.run_folder, fastq_samples, worker)

    if args.phase in ('counting', 'all'):
        count_samples(fastq_samples, worker, args)

    if args.phase in ('summarizing', 'all') and process_rank == 0:
        collate_results(fastq_samples, worker, args, logger)

    # FIXME: this log message gets sent before workers start
    logger.info('Finish processing run %s, rank %d',
                args.run_folder,
                process_rank)