def main(): logger = init_logging(os.path.join(home, 'kive_download.log'), file_log_level=logging.INFO, console_log_level=logging.INFO) args = parse_args() logger.info('Starting.') kive = kive_login(kive_server_url, kive_user, kive_password) if args.batchdate is not None: runs = find_batch_runs(kive, args.batchdate, args.batchsize) else: runs = find_old_runs(kive, startafter=args.startafter, startbefore=args.startbefore) unfinished_count = 0 for sample_name, run in runs: progress = run.json.get('run_progress') if progress: start_time = progress['start'] end_time = progress['end'] else: start_time = end_time = None if end_time is None: unfinished_count += 1 print(run.json['display_name']) print(' ' + sample_name) print(' {} - {}'.format(start_time, end_time)) if args.workfolder or args.resultfolder: download_results(runs, args.resultfolder, args.workfolder) logger.info('%d runs found (%d unfinished).', len(runs), unfinished_count)
def main(): logger = init_logging( os.path.join(home, "kive_download.log"), file_log_level=logging.INFO, console_log_level=logging.INFO ) args = parse_args() logger.info("Starting.") kive = kive_login(kive_server_url, kive_user, kive_password) if args.batchdate is not None: runs = find_batch_runs(kive, args.batchdate, args.batchsize) else: runs = find_old_runs(kive, startafter=args.startafter, startbefore=args.startbefore) unfinished_count = 0 for sample_name, run in runs: progress = run.json.get("run_progress") if progress: start_time = progress["start"] end_time = progress["end"] else: start_time = end_time = None if end_time is None: unfinished_count += 1 print(run.json["display_name"]) print(" " + sample_name) print(" {} - {}".format(start_time, end_time)) if args.workfolder or args.resultfolder: download_results(runs, args.resultfolder, args.workfolder) logger.info("%d runs found (%d unfinished).", len(runs), unfinished_count)
def main(): args = parseOptions() log_file = "{}/run.log".format(args.run_folder) logger = miseq_logging.init_logging(log_file, file_log_level=logging.DEBUG, console_log_level=logging.INFO) logger.info('Start processing run %s', args.run_folder) if args.clean: logger.info('Clean mode ON') else: logger.info('Clean mode OFF') try: logger.info('Removing old working files') excluded_files = ('.fastq', 'SampleSheet.csv', '.launch', 'MISEQ_MONITOR_OUTPUT.log', 'run.log', 'quality.csv') old_files = glob(args.run_folder+'/*') for f in old_files: is_excluded = False for ending in excluded_files: if f.endswith(ending): is_excluded = not (f.endswith('unmapped1.fastq') or f.endswith('unmapped2.fastq') or f.endswith('censored1.fastq') or f.endswith('censored2.fastq')) break if not is_excluded: if os.path.isdir(f): shutil.rmtree(f) else: os.remove(f) # Check for Open MPI prefix = '' expected_version = 'Open MPI' version = check_mpi_version(prefix) if not expected_version in version: prefix = 'module load openmpi/gnu && ' version = check_mpi_version(prefix) if not expected_version in version: sys.exit("Couldn't find Open MPI:\n{}".format(version)) monitor_path = os.path.abspath(os.path.dirname(__file__)) base_args = ['mpirun', '-np', '1', '--hostfile', os.path.join(monitor_path, 'hostfile'), os.path.join(monitor_path, 'sample_pipeline.py'), args.run_folder] if args.mode is not None: base_args.append(args.mode) base_args.append('--phase') filter_args = base_args[:] filter_args.append('filter') filter_command = prefix + ' '.join(filter_args) mapping_args = base_args[:] mapping_args[2] = str(settings.mapping_processes) mapping_args.append('mapping') mapping_command = prefix + ' '.join(mapping_args) counting_args = base_args[:] counting_args[2] = str(settings.counting_processes) counting_args.append('counting') counting_command = prefix + ' '.join(counting_args) summarizing_args = base_args[:] summarizing_args.append('summarizing') summarizing_command = prefix + ' '.join(summarizing_args) subprocess.check_call(filter_command, shell=True) subprocess.check_call(mapping_command, shell=True) subprocess.check_call(counting_command, shell=True) subprocess.check_call(summarizing_command, shell=True) if args.clean: # remove intermediate files logger.info('Removing large working files, clean mode') files_to_remove = glob(args.run_folder+'/*.prelim.csv') files_to_remove += glob(args.run_folder+'/*.remap.csv') files_to_remove += glob(args.run_folder+'/*.aligned.csv') files_to_remove += glob(args.run_folder+'/*.censored?.fastq') for f in files_to_remove: os.remove(f) logger.info('Finished processing run %s', args.run_folder) except: logger.error('Failed to process run %s', args.run_folder, exc_info=True) exit(1)
def main(): args = parseOptions() log_file = "{}/run.log".format(args.run_folder) logger = miseq_logging.init_logging(log_file, file_log_level=logging.DEBUG, console_log_level=logging.INFO) logger.info('Start processing run %s', args.run_folder) if args.clean: logger.info('Clean mode ON') else: logger.info('Clean mode OFF') try: logger.info('Removing old working files') excluded_files = ('.fastq', 'SampleSheet.csv', '.launch', 'MISEQ_MONITOR_OUTPUT.log', 'run.log', 'quality.csv') old_files = glob(args.run_folder + '/*') for f in old_files: is_excluded = False for ending in excluded_files: if f.endswith(ending): is_excluded = not (f.endswith('unmapped1.fastq') or f.endswith('unmapped2.fastq') or f.endswith('censored1.fastq') or f.endswith('censored2.fastq')) break if not is_excluded: if os.path.isdir(f): shutil.rmtree(f) else: os.remove(f) # Check for Open MPI prefix = '' expected_version = 'Open MPI' version = check_mpi_version(prefix) if not expected_version in version: prefix = 'module load openmpi/gnu && ' version = check_mpi_version(prefix) if not expected_version in version: sys.exit("Couldn't find Open MPI:\n{}".format(version)) monitor_path = os.path.abspath(os.path.dirname(__file__)) base_args = [ 'mpirun', '-np', '1', '--hostfile', os.path.join(monitor_path, 'hostfile'), os.path.join(monitor_path, 'sample_pipeline.py'), args.run_folder ] if args.mode is not None: base_args.append(args.mode) base_args.append('--phase') filter_args = base_args[:] filter_args.append('filter') filter_command = prefix + ' '.join(filter_args) mapping_args = base_args[:] mapping_args[2] = str(settings.mapping_processes) mapping_args.append('mapping') mapping_command = prefix + ' '.join(mapping_args) counting_args = base_args[:] counting_args[2] = str(settings.counting_processes) counting_args.append('counting') counting_command = prefix + ' '.join(counting_args) summarizing_args = base_args[:] summarizing_args.append('summarizing') summarizing_command = prefix + ' '.join(summarizing_args) subprocess.check_call(filter_command, shell=True) subprocess.check_call(mapping_command, shell=True) subprocess.check_call(counting_command, shell=True) subprocess.check_call(summarizing_command, shell=True) if args.clean: # remove intermediate files logger.info('Removing large working files, clean mode') files_to_remove = glob(args.run_folder + '/*.prelim.csv') files_to_remove += glob(args.run_folder + '/*.remap.csv') files_to_remove += glob(args.run_folder + '/*.aligned.csv') files_to_remove += glob(args.run_folder + '/*.censored?.fastq') for f in files_to_remove: os.remove(f) logger.info('Finished processing run %s', args.run_folder) except: logger.error('Failed to process run %s', args.run_folder, exc_info=True) exit(1)
def main(): comm = MPI.COMM_WORLD # @UndefinedVariable process_rank = comm.Get_rank() process_count = comm.Get_size() args = parseOptions(comm) log_file = "{}/pipeline{}.log".format(args.run_folder, process_rank) logger = miseq_logging.init_logging(log_file, file_log_level=logging.DEBUG, console_log_level=logging.INFO) logger.info('Start processing run %s, rank %d', args.run_folder, process_rank) if args.mode is not None: run_info = None else: with open(args.run_folder+'/SampleSheet.csv', 'rU') as sample_sheet: logger.debug("sample_sheet_parser({})".format(sample_sheet)) run_info = sample_sheet_parser(sample_sheet) args.mode = run_info['Description'] fastq_samples = [] fastq_files = glob(args.run_folder + '/*_R1_001.fastq') for i, fastq in enumerate(fastq_files): if i % process_count != process_rank: # skip samples that are assigned to other worker processes continue sample_info = SampleInfo(fastq) # verify this sample is in SampleSheet.csv if run_info and sample_info.key not in run_info['Data']: logger.error( '{} not in SampleSheet.csv - cannot map this sample'.format( sample_info.key)) continue fastq_samples.append(sample_info) def launch_callback(command): logger.info("Launching {!r}".format(command)) worker = Worker(launch_callback=launch_callback, working_path=args.run_folder, are_temp_folders_deleted=are_temp_folders_deleted, logger=logger) if args.phase in ('filter', 'all'): filter_quality(args.run_folder, worker) if args.phase in ('mapping', 'all'): map_samples(args.run_folder, fastq_samples, worker) if args.phase in ('counting', 'all'): count_samples(fastq_samples, worker, args) if args.phase in ('summarizing', 'all') and process_rank == 0: collate_results(fastq_samples, worker, args, logger) # FIXME: this log message gets sent before workers start logger.info('Finish processing run %s, rank %d', args.run_folder, process_rank)