def GetProjectNamePathRunID(): results_path = params.GetConfigReader().get('RESULTS', 'results_path') cancer_type = params.GetCancerType() cancer_dir_path = "/".join([results_path, cancer_type]) haplotype_path = "/".join([cancer_dir_path, "haplotypedir"]) tmpbams_path = "/".join([cancer_dir_path, "tmpbams"]) finalbams_path = "/".join([cancer_dir_path, "finalbams"]) sentinel_path = CheckPath(cancer_dir_path + '/' + params.GetProjectName() + "_" + rid.GetRunID() + '/sentinels/') return sentinel_path, results_path, haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path
def GetScriptPath(sample_id, software): """returns path to store scripts""" results_path = configReader.get('RESULTS', 'results_path') cancer_type = params.GetCancerType() cancer_dir_path = "/".join([results_path, cancer_type]) script_path = cancer_dir_path + '/' + params.GetProjectName( ) + '_' + rid.GetRunID() + '/scripts/' try: os.makedirs(script_path) except: pass return script_path
def RunTask(command, num_cpu, mem_usage, sample_id, software): """assignes a task to the cluster""" results_path = configReader.get('RESULTS', 'results_path') cancer_type = params.GetCancerType() cancer_dir_path = "/".join([results_path, cancer_type]) time.sleep(random.uniform(params.num_samples, params.num_samples + 20)) log_path = cancer_dir_path + '/' + params.GetProjectName( ) + '_' + rid.GetRunID() + '/logs/' try: os.makedirs(log_path) except: pass cluster_cmd = params.GetQsubStatement().format(log_path, str(num_cpu), mem_usage) process = [] subprocess.call('chmod +x ' + command, shell=True) cluster_cmd = cluster_cmd + ' ' + command max_jobs = int(configReader.get('CLUSTER', 'max_jobs')) current = 0 jobcheck = subprocess.Popen('qstat -u {0} | wc -l'.format( getpass.getuser()), stdout=subprocess.PIPE, shell=True) for line in jobcheck.stdout: current = int(re.sub(r'[\r\n]', '', line)) - 2 while current >= max_jobs: time.sleep(10) jobcheck2 = subprocess.Popen('qstat -u {0} | wc -l'.format( getpass.getuser()), stdout=subprocess.PIPE, shell=True) for line in jobcheck2.stdout: current = int(re.sub(r'[\r\n]', '', line)) - 2 task = subprocess.Popen(cluster_cmd, shell=True) process.append(task) process.append(cluster_cmd) process.append(0) process.append(log_path + os.path.basename(command)) return process
def GetProjectPaths(results_path): cancer_type = params.GetCancerType() if (cancer_type): cancer_dir_path = "/".join([results_path, cancer_type]) else: cancer_dir_path = results_path haplotype_path = "/".join([cancer_dir_path, "haplotypedir"]) log_path = "/".join([cancer_dir_path, "logs"]) tmpbams_path = "/".join([cancer_dir_path, "tmpbams"]) finalbams_path = "/".join([cancer_dir_path, "finalbams"]) logfile = "/".join([log_path, "debug.log"]) createDirectory(results_path) createDirectory(cancer_dir_path) createDirectory(haplotype_path) createDirectory(log_path) createDirectory(tmpbams_path) createDirectory(finalbams_path) return (haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path, log_path, logfile)
def GetLogFile(logger_name): """returns the log file""" results_path = configReader.get('RESULTS', 'results_path') cancer_type = params.GetCancerType() cancer_dir_path = "/".join([results_path, cancer_type]) log = [] log_file_path = cancer_dir_path + '/' + \ params.GetProjectName( ) + '_' + rid.GetRunID() + '/batch_logs/' log_file = log_file_path + 'pipeline_batch_log_' + \ params.GetProjectName() + '_' + rid.GetRunID() + '.log' try: os.makedirs(log_file_path) except: pass logger_args = {} logger_args["file_name"] = log_file logger_args["level"] = logging.DEBUG logger_args["rotating"] = True logger_args["maxBytes"] = 10000000 logger_args["backupCount"] = 10 logger_args[ "formatter"] = "[%(asctime)s] [%(name)s] [%(levelname)s] - %(message)s" logger_proxy, logger_mutex = make_shared_logger_and_proxy( setup_std_shared_logger, logger_name, logger_args) log.append(logger_proxy) log.append(logger_mutex) return log