def update_cluster_runstats_in_local_queue(): configpath = Config.configpath sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.read_local_queue() cluster_status = cluster_stats.ClusterStats(configpath) cluster_status.get_account_info() print(cluster_status.account_info) # update record of sbatch jobs job_status = match_sbatch_history( suffix='chips', jobs_name=cluster_status.account_info['name'], jobs_status=cluster_status.account_info['status'], jobs_id=cluster_status.account_info['job_id']) print(job_status) for sampleid,job_type_status in job_status.items(): #sample_queue.set_sample_info( sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status']) sample_queue.set_sample_info( sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status']) job_status = match_sbatch_history( suffix='chips_check', jobs_name=cluster_status.account_info['name'], jobs_status=cluster_status.account_info['status'], jobs_id=cluster_status.account_info['job_id']) for sampleid,job_type_status in job_status.items(): #sample_queue.set_sample_info( sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status']) sample_queue.set_sample_info( sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status']) print(sample_queue) with open('test.json','w') as fp: json.dump(sample_queue.local_samples,fp) print(sample_queue.get_sample_fail_count(sample_id=sampleid,info_key='chips'))
def clean_up_after_completion(): configpath = Config.configpath sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.read_local_queue() samples_to_process = sample_queue.get_local_queue() for gsmid, sample_info in samples_to_process.items(): sample_path = os.path.join( Config.sys_config['paths']['data_collection_runs'], gsmid) cistrome_path = os.path.join( sample_path, Config.sys_config['paths']['cistrome_result']) if gsmid == '': gsmid = 'missing_id_do_not_delete_the_path' # check transfer is complete and results have not yet been deleted if (transfer_complete_check(gsmid) == True and transfer_to_backup_complete_check(gsmid) == True and os.path.exists(cistrome_path) == True): delete_sra_files(gsmid) delete_fastq_files(gsmid) delete_sbatch_files(gsmid) delete_result_files(gsmid, complete=True) return
def clean_up_failed_samples(): configpath = Config.configpath partition = Config.sys_config['process_server']['partition'] max_fails = int(Config.sys_config['process_server']['max_fails']) cluster_status = cluster_stats.ClusterStats(configpath) sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.read_local_queue() samples_to_process = sample_queue.get_local_queue() for gsmid, sample_info in samples_to_process.items(): if (sample_queue.get_sample_fail_count(sample_id=gsmid, info_key='SRA') >= max_fails or sample_queue.get_sample_fail_count( sample_id=gsmid, info_key='CHIPS') >= max_fails or sample_queue.get_sample_status_count( sample_id=gsmid, info_key='CHIPS_CHECK') >= max_fails): # NOTE: testing to see how many time chips_check has run rather than has failed # if it is running many times there is a problem with the sample # sample_queue.get_sample_fail_count(sample_id=gsmid,info_key='CHIPS_CHECK') >= max_fails): print(f'cleaning up failed sample {gsmid}') sample_queue.clear_sample_info(sample_id=gsmid, info_key='SRA') sample_queue.clear_sample_info(sample_id=gsmid, info_key='CHIPS') sample_queue.clear_sample_info(sample_id=gsmid, info_key='CHIPS_CHECK') delete_sbatch_files(gsmid) delete_sra_files(gsmid) delete_fastq_files(gsmid) delete_result_files(gsmid, complete=False) sample_queue.increment_sample_restart_count(sample_id=gsmid) sample_queue.write_local_queue() return
def setUp(self): sample_queue_path = './chips_test_dirs/cistrome_pipeline.conf' sample_json_file = './chips_test_dirs/test_collection.json' self.sample_queue = requests_from_cistromeDB.SampleQueue(sample_queue_path) with open(sample_json_file,'r') as fp: self.sample_queue.requested_samples = json.load(fp) print(self.sample_queue.requested_samples) self.sample_queue.read_local_queue()
def check_chips_results(): configpath = Config.configpath partition = Config.sys_config['process_server']['partition'] cluster_status = cluster_stats.ClusterStats(configpath) sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.read_local_queue() samples_to_process = sample_queue.get_local_queue() for gsmid, sample_info in samples_to_process.items(): sample_path = os.path.join( Config.sys_config['paths']['data_collection_runs'], gsmid) log_path = os.path.join(sample_path, f'chips_check_log_{gsmid}.txt') chips_yaml = os.path.join(sample_path, 'config.yaml') jobname = f'{gsmid}_chips_check' sbatch_path = os.path.join( Config.sys_config['paths']['data_collection_sbatch'], f'{jobname}.sbatch') print(gsmid) # check results have not been sent back already if transfer_complete_check(gsmid) == True: continue # check chips run is complete if chips_complete_check(gsmid) == False: continue # check chips run has not been checked already if chips_check_complete_check(gsmid) == True: continue # check number of jobs in queue cluster_status.get_jobs_in_queue() if (cluster_status.get_pending_job_count() > int( Config.sys_config['process_server']['max_jobs_pending'])): break # check job is not already in queue if cluster_status.is_job_name_in_queue( jobname) == True: # chips check job name: {ID}_chips_check continue cmd = f'python check_chips.py -c {configpath} -i {gsmid}' if DEBUG: sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 480 --mem 2000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path}' else: sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 480 --mem 2000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path} --submit' subprocess.run(sbatch_cmd, shell=True) time.sleep(1) print(datetime.datetime.now()) return
def update_cluster_runstats_in_local_queue(): configpath = Config.configpath sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.read_local_queue() cluster_status = cluster_stats.ClusterStats(configpath) cluster_status.get_account_info() # update record of sbatch jobs job_status = match_sbatch_history( suffix='sra', jobs_name=cluster_status.account_info['name'], jobs_status=cluster_status.account_info['status'], jobs_id=cluster_status.account_info['job_id']) for sampleid, job_type_status in job_status.items(): sample_queue.set_sample_info(sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status']) job_status = match_sbatch_history( suffix='chips', jobs_name=cluster_status.account_info['name'], jobs_status=cluster_status.account_info['status'], jobs_id=cluster_status.account_info['job_id']) for sampleid, job_type_status in job_status.items(): sample_queue.set_sample_info(sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status']) job_status = match_sbatch_history( suffix='chips_check', jobs_name=cluster_status.account_info['name'], jobs_status=cluster_status.account_info['status'], jobs_id=cluster_status.account_info['job_id']) for sampleid, job_type_status in job_status.items(): sample_queue.set_sample_info(sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status']) sample_queue.write_local_queue()
def update_samples_in_local_queue(): configpath = Config.configpath sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.update_local_queue()
def transfer_to_backup_server(): configpath = Config.configpath server = 'backup_server' cluster_status = cluster_stats.ClusterStats(configpath) sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.read_local_queue() samples_to_process = sample_queue.get_local_queue() max_backup_rsync = int( Config.sys_config['process_server']['max_jobs_rsync_backup']) max_jobs_running = int( Config.sys_config['process_server']['max_jobs_running']) max_jobs_pending = int( Config.sys_config['process_server']['max_jobs_pending']) partition = Config.sys_config['process_server']['partition'] n_fails = 0 # keep track of failures fp = open('schedule_rsync_backup_log.txt', 'a') print('rsync backup running:', datetime.datetime.now(), file=fp) for gsmid, sample_info in samples_to_process.items(): if chips_check_complete_check( gsmid) == True and transfer_to_backup_complete_check( gsmid) == False: # check number of jobs in queue cluster_status.get_jobs_in_queue() if cluster_status.get_pending_job_count() >= max_jobs_pending: break if cluster_status.get_running_job_count() >= max_jobs_running: break # TODO check how more than max_backup_rsync jobs can run (slurm failed to report jobs?) n_backup_rsync_jobs = len([ jobname for jobname in cluster_status.list_job_names_in_queue() if '_backup_rsync' in jobname ]) if n_backup_rsync_jobs >= max_backup_rsync: break jobname = f'{gsmid}_backup_rsync' if cluster_status.is_job_name_in_queue(jobname) == True: continue sample_path = os.path.join( Config.sys_config['paths']['data_collection_runs'], gsmid) log_path = os.path.join(sample_path, f'backup_rsync_log_{gsmid}.txt') sbatch_path = os.path.join( Config.sys_config['paths']['data_collection_sbatch'], f'{jobname}.sbatch') cmd = f'python file_transfer_to_server.py -c {configpath} -i {gsmid} -s {server}' if not DEBUG: print(f'rsync {gsmid}:', datetime.datetime.now(), file=fp) #subprocess.check_call(cmd,shell=True) #print(f'rsync {gsmid} complete:',datetime.datetime.now(),file=fp) sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 3600 --mem 1000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path} --submit' subprocess.run(sbatch_cmd, shell=True) # track failure to backup #if transfer_to_backup_complete_check(gsmid) == False: # n_fails += 1 # print(f'rsync {gsmid} failure {n_fails}:',datetime.datetime.now(),file=fp) # reduce fail count on success #elif n_fails > 0: # n_fails -= 1 #if n_fails >= MAX_BACKUP_FAILURES: # break time.sleep(10) print(datetime.datetime.now()) fp.flush() fp.close() return
def transfer_to_server(): configpath = Config.configpath server = 'home_server' cluster_status = cluster_stats.ClusterStats(configpath) sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.read_local_queue() samples_to_process = sample_queue.get_local_queue() max_data_rsync = int( Config.sys_config['process_server']['max_jobs_rsync_data']) max_jobs_running = int( Config.sys_config['process_server']['max_jobs_running']) max_jobs_pending = int( Config.sys_config['process_server']['max_jobs_pending']) partition = Config.sys_config['process_server']['partition'] fp = open('schedule_rsync_data_log.txt', 'a') print('rsync data running:', datetime.datetime.now(), file=fp) for gsmid, sample_info in samples_to_process.items(): if chips_check_complete_check( gsmid) == True and transfer_complete_check(gsmid) == False: # check number of jobs in queue cluster_status.get_jobs_in_queue() if cluster_status.get_pending_job_count() >= max_jobs_pending: break if cluster_status.get_running_job_count() >= max_jobs_running: break n_rsync_jobs = len([ jobname for jobname in cluster_status.list_job_names_in_queue() if '_data_rsync' in jobname ]) if n_rsync_jobs >= max_data_rsync: break jobname = f'{gsmid}_data_rsync' if cluster_status.is_job_name_in_queue(jobname) == True: continue sample_path = os.path.join( Config.sys_config['paths']['data_collection_runs'], gsmid) log_path = os.path.join(sample_path, f'data_rsync_log_{gsmid}.txt') sbatch_path = os.path.join( Config.sys_config['paths']['data_collection_sbatch'], f'{jobname}.sbatch') cmd = f'python file_transfer_to_server.py -c {configpath} -i {gsmid} -s {server}' if not DEBUG: print(f'rsync {gsmid}:', datetime.datetime.now(), file=fp) sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 3600 --mem 1000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path} --submit' subprocess.run(sbatch_cmd, shell=True) else: print(cmd) time.sleep(10) print(datetime.datetime.now()) fp.flush() fp.close() return
def setup_and_run_chips(): fp = open('schedule_chips_log.txt', 'a') print('job stat update running:', datetime.datetime.now(), file=fp) update_cluster_runstats_in_local_queue() print('clean up failed samples:', datetime.datetime.now(), file=fp) clean_up_failed_samples() print('chips job submission running:', datetime.datetime.now(), file=fp) # processing differs between sample types sampletype_lookup = { 'dnase': 'dnase', 'atac': 'atac', 'tf': 'tf', 'h3k27ac': 'h3k27ac', 'h3k4me3': 'h3k4me3' } max_jobs_pending = int( Config.sys_config['process_server']['max_jobs_pending']) max_restarts = int(Config.sys_config['process_server']['max_restarts']) configpath = Config.configpath partition = Config.sys_config['process_server']['partition'] cluster_status = cluster_stats.ClusterStats(configpath) sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.read_local_queue() samples_to_process = sample_queue.get_local_queue() # TODO confirm consistency between words used to specify chips and types in sample request file for gsmid, sample_info in samples_to_process.items(): #print('chips loop',gsmid,file=fp) if sample_queue.get_sample_restart_count( sample_id=gsmid) >= max_restarts: continue # check fastq check-file exists if fastq_check(gsmid) == False: continue # check chips run is not complete if chips_complete_check(gsmid) == True: continue # check results have not been sent back already if transfer_complete_check(gsmid) == True: continue # check number of jobs in queue cluster_status.get_jobs_in_queue() if cluster_status.get_pending_job_count() > max_jobs_pending: print(gsmid, 'too many jobs pending: break chips submission', file=fp) break # check job is not already in queue if cluster_status.is_job_name_in_queue( f'{gsmid}_chips') == True: # chips job name: {ID}_chips continue species = sample_info['species'] sampletype = sample_info['sampletype'] if sample_info['broad'].lower() == 'true': broad = '--broad' else: broad = '' # Improve place of Lookup sampletype = sampletype_lookup[sampletype.lower()] cmd = f'python chips_job_submission.py -c {configpath} --gsm {gsmid} --species {species} --sampletype {sampletype} {broad} --submit' if DEBUG: print(gsmid) print(sample_info) print(cmd) else: subprocess.run(cmd, shell=True) print(gsmid, datetime.datetime.now(), file=fp) time.sleep(1) fp.close()
def download_from_sra(): configpath = Config.configpath config = Config.sys_config partition = config['process_server']['partition'] cluster_status = cluster_stats.ClusterStats(configpath) max_fastq_file_number = int( Config.sys_config['process_server']['max_fastq_file_number']) max_fails = int(Config.sys_config['process_server']['max_fails']) max_restarts = int(Config.sys_config['process_server']['max_restarts']) # TODO scratch = cluster_status.get_scratch_use() # check disk space availability #if (scratch['quota'] - scratch['used']) < float(config['process_server']['min_disk_space_avail']): # return sample_queue = requests_from_cistromeDB.SampleQueue(configpath) sample_queue.read_local_queue() samples_to_process = sample_queue.get_local_queue() fp = open('schedule_sra_log.txt', 'a') for gsmid, sample_info in samples_to_process.items(): # don't download if there are already enough files to process if get_fastq_sample_number() > max_fastq_file_number: print('too many fastq files', get_fastq_sample_number(), max_fastq_file_number, file=fp) break print(gsmid, file=fp) log_path = get_sra_log_path(gsmid) # check results have not been sent back already if transfer_complete_check(gsmid) == True: continue # check fastq check-file does not exist if fastq_check(gsmid) == True: continue # limit number of restarts if sample_queue.get_sample_restart_count( sample_id=gsmid) >= max_restarts: continue # limit number of download tries if sample_queue.get_sample_status_count(sample_id=gsmid, info_key='SRA') > max_fails: continue # check number of jobs pending cluster_status.get_jobs_in_queue() if (cluster_status.get_pending_job_count() > int( Config.sys_config['process_server']['max_jobs_pending'])): print(gsmid, 'too many jobs pending', cluster_status.get_pending_job_count(), int(Config.sys_config['process_server']['max_jobs_pending']), file=fp) break # check number of jobs running if (cluster_status.get_running_job_count() > int( Config.sys_config['process_server']['max_jobs_running'])): print(gsmid, 'too many jobs running', cluster_status.get_running_job_count(), int(Config.sys_config['process_server']['max_jobs_running']), file=fp) break # check job is not already in queue jobname = f'{gsmid}_sra' if cluster_status.is_job_name_in_queue( f'{gsmid}_sra') == True: # SRA download job name: {ID}_sra continue cmd = f'python sra_download.py -c {configpath} -i {gsmid}' sbatch_path = os.path.join( Config.sys_config['paths']['data_collection_sbatch'], f'{jobname}.sbatch') if DEBUG: sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 300 --mem 2000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path}' else: sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 300 --mem 2000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path} --submit' subprocess.run(sbatch_cmd, shell=True) time.sleep(1) print(datetime.datetime.now(), file=fp) fp.close()