def sync(run_id, conf): """Synchronize a run. Arguments: run_id: the run id conf: configuration dictionary """ start_time = time.time() common.log('INFO', 'Sync step: Starting', conf) bcl_data_path = conf[BCL_DATA_PATH_KEY] reports_data_base_path = conf[REPORTS_DATA_PATH_KEY] output_path = bcl_data_path + '/' + run_id # check if rsync exists in PATH if not common.exists_in_path("rsync"): error("Can't find all needed commands in PATH env var", "Can't find all needed commands in PATH env var. Unable to find: rsync command.", conf) return False # Check if reports_data_path exists if not os.path.exists(reports_data_base_path): error("Report directory does not exist", "Report directory does not exist: " + reports_data_base_path, conf) return False # Check if enough space to store reports if common.df(reports_data_base_path) < 10 * 1024 * 1024 * 1024: error("Not enough disk space to store aozan reports for run " + run_id, "Not enough disk space to store aozan reports for run " + run_id + '.\nNeed more than 10 Gb on ' + reports_data_base_path + '.', conf) return False # Do the synchronization if not partial_sync(run_id, True, conf): return False # Rename partial sync directory to final run BCL directory if os.path.exists(output_path + '.tmp'): os.rename(output_path + '.tmp', output_path) # Check used and free space df_in_bytes = common.df(bcl_data_path) du_in_bytes = common.du(output_path) df = df_in_bytes / (1024 * 1024 * 1024) du = du_in_bytes / (1024 * 1024 * 1024) common.log("WARNING", "Sync step: output disk free after sync: " + str(df_in_bytes), conf) common.log("WARNING", "Sync step: space used by sync: " + str(du_in_bytes), conf) duration = time.time() - start_time msg = 'Ending synchronization for run ' + run_id + '.\n' + \ 'Job finished at ' + common.time_to_human_readable(time.time()) + \ ' without error in ' + common.duration_to_human_readable(duration) + '.\n\n' + \ 'Run output files (without .cif files) can be found in the following directory:\n ' + output_path # Add path to report if reports.url exists if common.is_conf_key_exists(REPORTS_URL_KEY, conf): msg += '\n\nRun reports can be found at following location:\n ' + conf[REPORTS_URL_KEY] + '/' + run_id msg += '\n\nFor this task %.2f GB has been used and %.2f GB is still free.' % (du, df) common.send_msg('[Aozan] Ending synchronization for run ' + run_id + ' on ' + common.get_instrument_name(run_id, conf), msg, False, conf) common.log('INFO', 'sync step: successful in ' + common.duration_to_human_readable(duration), conf) return True
Common.initLogger(conf[AOZAN_LOG_PATH_KEY], conf[AOZAN_LOG_LEVEL_KEY]) except AozanException, exp: common.exception_msg(exp, conf) # Check main path file in configuration if not common.check_configuration(conf, conf_file): common.log('SEVERE', 'Aozan can not be executed. Configuration is invalid or missing, some useful directories ' + 'may be inaccessible. ', conf) sys.exit(1) # check if global program set is available in PATH global_program_set = {"bash", "du", "touch", "chmod", "cp", "mv", "rm", "find", "tar"} for program in global_program_set: if not common.exists_in_path(program): common.log('SEVERE', "Can't find all needed commands in PATH env var. Unable to find: " + program + " command.", conf) sys.exit(1) # Check critical free space available hiseq_run.send_mail_if_critical_free_space_available(conf) lock_file_path = conf[LOCK_FILE_KEY] # Run only if there is no lock # if not os.path.exists(lock_file_path): if not lock_file_exists(lock_file_path): try: # Create lock file create_lock_file(lock_file_path)
def recompress(run_id, conf): """Proceed to recompression of a run. Arguments: run_id: The run id conf: configuration dictionary """ common.log('INFO', 'Recompress step: Starting', conf) # Check if input root fastq root data exists if not common.is_dir_exists(FASTQ_DATA_PATH_KEY, conf): error("FASTQ data directory does not exist", "FASTQ data directory does not exist: " + conf[FASTQ_DATA_PATH_KEY], conf) return False start_time = time.time() fastq_input_dir = conf[FASTQ_DATA_PATH_KEY] + '/' + run_id # initial du for comparing with ending disk usage previous_du_in_bytes = common.du(fastq_input_dir) # get information about compression type compression_type = conf[RECOMPRESS_COMPRESSION_KEY] compression_level = conf[RECOMPRESS_COMPRESSION_LEVEL_KEY] compression_info_tuple = get_info_from_file_type(compression_type, compression_level) if compression_info_tuple is None: error("Unknown compression type", "Unknown compression type: " + compression_type, conf) return False (compression_type_result, output_file_extension, output_compression_command, output_decompression_command, compression_level_argument) = compression_info_tuple # The following list contains the processed type of files to recompress types_to_recompress = ["fastq.gz", "fastq"] # list of program to check if exists in path before execution program_set = {"bash", "tee", "touch", "chmod", "md5sum", output_compression_command, output_decompression_command} # get list of file to process input_files = [] for extension in types_to_recompress: input_files.extend(list_files(fastq_input_dir, extension)) simple_extension = os.path.splitext(extension)[-1][1:] extension_info_tuple = get_info_from_file_type(simple_extension) if extension_info_tuple is None: error("Unknown extension type", "Unknown extension type: " + extension, conf) return False program_set.add(extension_info_tuple[3]) # actual program list check for program in program_set: if not common.exists_in_path(program): error("Can't find all needed commands in PATH env var", "Can't find all needed commands in PATH env var. Unable to find: " + program + " command.", conf) return False # Create executor and for parallelization of processus executor = Executors.newFixedThreadPool(int(conf[RECOMPRESS_THREADS_KEY])) workers = [] # process each fastq and fastq.gz recursively in each fastq directory for input_file in input_files: simple_extension = os.path.splitext(input_file)[-1][1:] # get info about the type of input file extension_info_tuple = get_info_from_file_type(simple_extension) if extension_info_tuple is None: error("Unknown extension type", "Unknown extension type: " + simple_extension, conf) return False input_decompression_command = extension_info_tuple[3] # get file base name and create output_file name, if file is already .fastq its ready to be base_input_file base_input_file = input_file[0: input_file.index(".fastq") + 6] output_file = base_input_file + "." + output_file_extension # Skip if the output_file already exists if not os.path.exists(output_file): # Create worker then execute in thread worker = Worker(input_file, output_file, input_decompression_command, output_compression_command, output_decompression_command, compression_level_argument, common.is_conf_value_equals_true(RECOMPRESS_DELETE_ORIGINAL_FASTQ_KEY, conf)) workers.append(worker) executor.execute(worker) else: common.log("WARNING", "Recompress step: Omitting processing file " + input_file + ". The associated output file " + output_file + " already exists.", conf) # Wait for all thread to finish executor.shutdown() while not executor.isTerminated(): time.sleep(1) # Check if any worker is in error for worker in workers: if not worker.is_successful(): error(worker.get_error_message(), worker.get_long_error_message(), conf) return False # check new disk usage df_in_bytes = common.df(fastq_input_dir) du_in_bytes = common.du(fastq_input_dir) previous_du = previous_du_in_bytes / (1024 * 1024) df = df_in_bytes / (1024 * 1024 * 1024) du = du_in_bytes / (1024 * 1024) common.log("WARNING", "Recompress step: output disk free after step: " + str(df_in_bytes), conf) common.log("WARNING", "Recompress step: space previously used: " + str(previous_du_in_bytes), conf) common.log("WARNING", "Recompress step: space now used by step: " + str(du_in_bytes), conf) duration = time.time() - start_time msg = 'Ending recompression for run ' + run_id + '.' + \ '\nJob finished at ' + common.time_to_human_readable(time.time()) + \ ' without error in ' + common.duration_to_human_readable(duration) + '. ' msg += '\n\nAfter recompress step FASTQ folder is now %.2f MB (previously %.2f MB) and %.2f GB still free.' % ( du, previous_du, df) common.send_msg('[Aozan] Ending recompress for run ' + run_id + ' on ' + common.get_instrument_name(run_id, conf), msg, False, conf) common.log('INFO', 'Recompress step: successful in ' + common.duration_to_human_readable(duration), conf) return True