def update_cluster_runstats_in_local_queue():
    configpath = Config.configpath
    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.read_local_queue()

    cluster_status = cluster_stats.ClusterStats(configpath)
    cluster_status.get_account_info()

    print(cluster_status.account_info)
    # update record of sbatch jobs

    job_status = match_sbatch_history( suffix='chips', jobs_name=cluster_status.account_info['name'], jobs_status=cluster_status.account_info['status'], jobs_id=cluster_status.account_info['job_id'])

    print(job_status)
    for sampleid,job_type_status in job_status.items():
        #sample_queue.set_sample_info( sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status'])
        sample_queue.set_sample_info( sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status'])

    job_status = match_sbatch_history( suffix='chips_check', jobs_name=cluster_status.account_info['name'], jobs_status=cluster_status.account_info['status'], jobs_id=cluster_status.account_info['job_id'])
    for sampleid,job_type_status in job_status.items():
        #sample_queue.set_sample_info( sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status'])
        sample_queue.set_sample_info( sample_id=sampleid, info_key=job_type_status['type'], info_val=job_type_status['status'])

    print(sample_queue)
    with open('test.json','w') as fp:
        json.dump(sample_queue.local_samples,fp)

    print(sample_queue.get_sample_fail_count(sample_id=sampleid,info_key='chips'))
def clean_up_after_completion():

    configpath = Config.configpath
    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.read_local_queue()
    samples_to_process = sample_queue.get_local_queue()

    for gsmid, sample_info in samples_to_process.items():

        sample_path = os.path.join(
            Config.sys_config['paths']['data_collection_runs'], gsmid)
        cistrome_path = os.path.join(
            sample_path, Config.sys_config['paths']['cistrome_result'])

        if gsmid == '':
            gsmid = 'missing_id_do_not_delete_the_path'

        # check transfer is complete and results have not yet been deleted
        if (transfer_complete_check(gsmid) == True
                and transfer_to_backup_complete_check(gsmid) == True
                and os.path.exists(cistrome_path) == True):

            delete_sra_files(gsmid)
            delete_fastq_files(gsmid)
            delete_sbatch_files(gsmid)
            delete_result_files(gsmid, complete=True)

    return
def clean_up_failed_samples():
    configpath = Config.configpath
    partition = Config.sys_config['process_server']['partition']
    max_fails = int(Config.sys_config['process_server']['max_fails'])
    cluster_status = cluster_stats.ClusterStats(configpath)

    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.read_local_queue()
    samples_to_process = sample_queue.get_local_queue()

    for gsmid, sample_info in samples_to_process.items():
        if (sample_queue.get_sample_fail_count(sample_id=gsmid, info_key='SRA')
                >= max_fails or sample_queue.get_sample_fail_count(
                    sample_id=gsmid, info_key='CHIPS') >= max_fails
                or sample_queue.get_sample_status_count(
                    sample_id=gsmid, info_key='CHIPS_CHECK') >= max_fails):
            # NOTE: testing to see how many time chips_check has run rather than has failed
            # if it is running many times there is a problem with the sample
            # sample_queue.get_sample_fail_count(sample_id=gsmid,info_key='CHIPS_CHECK') >= max_fails):

            print(f'cleaning up failed sample {gsmid}')
            sample_queue.clear_sample_info(sample_id=gsmid, info_key='SRA')
            sample_queue.clear_sample_info(sample_id=gsmid, info_key='CHIPS')
            sample_queue.clear_sample_info(sample_id=gsmid,
                                           info_key='CHIPS_CHECK')
            delete_sbatch_files(gsmid)
            delete_sra_files(gsmid)
            delete_fastq_files(gsmid)
            delete_result_files(gsmid, complete=False)
            sample_queue.increment_sample_restart_count(sample_id=gsmid)

    sample_queue.write_local_queue()
    return
Ejemplo n.º 4
0
    def setUp(self):
        sample_queue_path = './chips_test_dirs/cistrome_pipeline.conf'
        sample_json_file  = './chips_test_dirs/test_collection.json'

        self.sample_queue = requests_from_cistromeDB.SampleQueue(sample_queue_path)
        with open(sample_json_file,'r') as fp:
            self.sample_queue.requested_samples = json.load(fp)
            print(self.sample_queue.requested_samples)
        self.sample_queue.read_local_queue()
def check_chips_results():

    configpath = Config.configpath
    partition = Config.sys_config['process_server']['partition']
    cluster_status = cluster_stats.ClusterStats(configpath)

    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.read_local_queue()
    samples_to_process = sample_queue.get_local_queue()

    for gsmid, sample_info in samples_to_process.items():

        sample_path = os.path.join(
            Config.sys_config['paths']['data_collection_runs'], gsmid)
        log_path = os.path.join(sample_path, f'chips_check_log_{gsmid}.txt')
        chips_yaml = os.path.join(sample_path, 'config.yaml')
        jobname = f'{gsmid}_chips_check'
        sbatch_path = os.path.join(
            Config.sys_config['paths']['data_collection_sbatch'],
            f'{jobname}.sbatch')

        print(gsmid)
        # check results have not been sent back already
        if transfer_complete_check(gsmid) == True:
            continue

        # check chips run is complete
        if chips_complete_check(gsmid) == False:
            continue

        # check chips run has not been checked already
        if chips_check_complete_check(gsmid) == True:
            continue

        # check number of jobs in queue
        cluster_status.get_jobs_in_queue()
        if (cluster_status.get_pending_job_count() > int(
                Config.sys_config['process_server']['max_jobs_pending'])):
            break

        # check job is not already in queue
        if cluster_status.is_job_name_in_queue(
                jobname) == True:  # chips check job name: {ID}_chips_check
            continue

        cmd = f'python check_chips.py -c {configpath} -i {gsmid}'

        if DEBUG:
            sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 480 --mem 2000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path}'
        else:
            sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 480 --mem 2000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path} --submit'

        subprocess.run(sbatch_cmd, shell=True)
        time.sleep(1)
        print(datetime.datetime.now())

    return
def update_cluster_runstats_in_local_queue():
    configpath = Config.configpath
    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.read_local_queue()

    cluster_status = cluster_stats.ClusterStats(configpath)
    cluster_status.get_account_info()

    # update record of sbatch jobs
    job_status = match_sbatch_history(
        suffix='sra',
        jobs_name=cluster_status.account_info['name'],
        jobs_status=cluster_status.account_info['status'],
        jobs_id=cluster_status.account_info['job_id'])
    for sampleid, job_type_status in job_status.items():
        sample_queue.set_sample_info(sample_id=sampleid,
                                     info_key=job_type_status['type'],
                                     info_val=job_type_status['status'])

    job_status = match_sbatch_history(
        suffix='chips',
        jobs_name=cluster_status.account_info['name'],
        jobs_status=cluster_status.account_info['status'],
        jobs_id=cluster_status.account_info['job_id'])
    for sampleid, job_type_status in job_status.items():
        sample_queue.set_sample_info(sample_id=sampleid,
                                     info_key=job_type_status['type'],
                                     info_val=job_type_status['status'])

    job_status = match_sbatch_history(
        suffix='chips_check',
        jobs_name=cluster_status.account_info['name'],
        jobs_status=cluster_status.account_info['status'],
        jobs_id=cluster_status.account_info['job_id'])
    for sampleid, job_type_status in job_status.items():
        sample_queue.set_sample_info(sample_id=sampleid,
                                     info_key=job_type_status['type'],
                                     info_val=job_type_status['status'])

    sample_queue.write_local_queue()
def update_samples_in_local_queue():
    configpath = Config.configpath
    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.update_local_queue()
def transfer_to_backup_server():

    configpath = Config.configpath
    server = 'backup_server'
    cluster_status = cluster_stats.ClusterStats(configpath)
    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.read_local_queue()
    samples_to_process = sample_queue.get_local_queue()
    max_backup_rsync = int(
        Config.sys_config['process_server']['max_jobs_rsync_backup'])
    max_jobs_running = int(
        Config.sys_config['process_server']['max_jobs_running'])
    max_jobs_pending = int(
        Config.sys_config['process_server']['max_jobs_pending'])

    partition = Config.sys_config['process_server']['partition']

    n_fails = 0  # keep track of failures

    fp = open('schedule_rsync_backup_log.txt', 'a')
    print('rsync backup running:', datetime.datetime.now(), file=fp)

    for gsmid, sample_info in samples_to_process.items():
        if chips_check_complete_check(
                gsmid) == True and transfer_to_backup_complete_check(
                    gsmid) == False:

            # check number of jobs in queue
            cluster_status.get_jobs_in_queue()
            if cluster_status.get_pending_job_count() >= max_jobs_pending:
                break

            if cluster_status.get_running_job_count() >= max_jobs_running:
                break

            # TODO check how more than max_backup_rsync jobs can run (slurm failed to report jobs?)
            n_backup_rsync_jobs = len([
                jobname
                for jobname in cluster_status.list_job_names_in_queue()
                if '_backup_rsync' in jobname
            ])
            if n_backup_rsync_jobs >= max_backup_rsync:
                break

            jobname = f'{gsmid}_backup_rsync'
            if cluster_status.is_job_name_in_queue(jobname) == True:
                continue

            sample_path = os.path.join(
                Config.sys_config['paths']['data_collection_runs'], gsmid)
            log_path = os.path.join(sample_path,
                                    f'backup_rsync_log_{gsmid}.txt')
            sbatch_path = os.path.join(
                Config.sys_config['paths']['data_collection_sbatch'],
                f'{jobname}.sbatch')
            cmd = f'python file_transfer_to_server.py -c {configpath} -i {gsmid} -s {server}'

            if not DEBUG:
                print(f'rsync {gsmid}:', datetime.datetime.now(), file=fp)
                #subprocess.check_call(cmd,shell=True)
                #print(f'rsync {gsmid} complete:',datetime.datetime.now(),file=fp)
                sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 3600 --mem 1000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path} --submit'
                subprocess.run(sbatch_cmd, shell=True)

            # track failure to backup
            #if transfer_to_backup_complete_check(gsmid) == False:
            #    n_fails += 1
            #    print(f'rsync {gsmid} failure {n_fails}:',datetime.datetime.now(),file=fp)
            # reduce fail count on success
            #elif n_fails > 0:
            #    n_fails -= 1
            #if n_fails >= MAX_BACKUP_FAILURES:
            #    break

            time.sleep(10)
            print(datetime.datetime.now())
            fp.flush()

    fp.close()
    return
def transfer_to_server():

    configpath = Config.configpath
    server = 'home_server'
    cluster_status = cluster_stats.ClusterStats(configpath)
    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.read_local_queue()
    samples_to_process = sample_queue.get_local_queue()
    max_data_rsync = int(
        Config.sys_config['process_server']['max_jobs_rsync_data'])
    max_jobs_running = int(
        Config.sys_config['process_server']['max_jobs_running'])
    max_jobs_pending = int(
        Config.sys_config['process_server']['max_jobs_pending'])

    partition = Config.sys_config['process_server']['partition']

    fp = open('schedule_rsync_data_log.txt', 'a')
    print('rsync data running:', datetime.datetime.now(), file=fp)

    for gsmid, sample_info in samples_to_process.items():
        if chips_check_complete_check(
                gsmid) == True and transfer_complete_check(gsmid) == False:

            # check number of jobs in queue
            cluster_status.get_jobs_in_queue()
            if cluster_status.get_pending_job_count() >= max_jobs_pending:
                break

            if cluster_status.get_running_job_count() >= max_jobs_running:
                break

            n_rsync_jobs = len([
                jobname
                for jobname in cluster_status.list_job_names_in_queue()
                if '_data_rsync' in jobname
            ])
            if n_rsync_jobs >= max_data_rsync:
                break

            jobname = f'{gsmid}_data_rsync'
            if cluster_status.is_job_name_in_queue(jobname) == True:
                continue

            sample_path = os.path.join(
                Config.sys_config['paths']['data_collection_runs'], gsmid)
            log_path = os.path.join(sample_path, f'data_rsync_log_{gsmid}.txt')
            sbatch_path = os.path.join(
                Config.sys_config['paths']['data_collection_sbatch'],
                f'{jobname}.sbatch')
            cmd = f'python file_transfer_to_server.py -c {configpath} -i {gsmid} -s {server}'

            if not DEBUG:
                print(f'rsync {gsmid}:', datetime.datetime.now(), file=fp)
                sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 3600 --mem 1000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path} --submit'
                subprocess.run(sbatch_cmd, shell=True)
            else:
                print(cmd)

            time.sleep(10)
            print(datetime.datetime.now())
            fp.flush()

    fp.close()
    return
def setup_and_run_chips():

    fp = open('schedule_chips_log.txt', 'a')
    print('job stat update running:', datetime.datetime.now(), file=fp)
    update_cluster_runstats_in_local_queue()
    print('clean up failed samples:', datetime.datetime.now(), file=fp)
    clean_up_failed_samples()

    print('chips job submission running:', datetime.datetime.now(), file=fp)
    # processing differs between sample types
    sampletype_lookup = {
        'dnase': 'dnase',
        'atac': 'atac',
        'tf': 'tf',
        'h3k27ac': 'h3k27ac',
        'h3k4me3': 'h3k4me3'
    }

    max_jobs_pending = int(
        Config.sys_config['process_server']['max_jobs_pending'])
    max_restarts = int(Config.sys_config['process_server']['max_restarts'])
    configpath = Config.configpath
    partition = Config.sys_config['process_server']['partition']
    cluster_status = cluster_stats.ClusterStats(configpath)

    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.read_local_queue()
    samples_to_process = sample_queue.get_local_queue()

    # TODO confirm consistency between words used to specify chips and types in sample request file
    for gsmid, sample_info in samples_to_process.items():

        #print('chips loop',gsmid,file=fp)
        if sample_queue.get_sample_restart_count(
                sample_id=gsmid) >= max_restarts:
            continue

        # check fastq check-file exists
        if fastq_check(gsmid) == False:
            continue

        # check chips run is not complete
        if chips_complete_check(gsmid) == True:
            continue

        # check results have not been sent back already
        if transfer_complete_check(gsmid) == True:
            continue

        # check number of jobs in queue
        cluster_status.get_jobs_in_queue()
        if cluster_status.get_pending_job_count() > max_jobs_pending:
            print(gsmid,
                  'too many jobs pending: break chips submission',
                  file=fp)
            break

        # check job is not already in queue
        if cluster_status.is_job_name_in_queue(
                f'{gsmid}_chips') == True:  # chips job name: {ID}_chips
            continue

        species = sample_info['species']
        sampletype = sample_info['sampletype']
        if sample_info['broad'].lower() == 'true':
            broad = '--broad'
        else:
            broad = ''

        # Improve place of Lookup
        sampletype = sampletype_lookup[sampletype.lower()]
        cmd = f'python chips_job_submission.py -c {configpath} --gsm {gsmid} --species {species} --sampletype {sampletype} {broad} --submit'
        if DEBUG:
            print(gsmid)
            print(sample_info)
            print(cmd)
        else:
            subprocess.run(cmd, shell=True)
            print(gsmid, datetime.datetime.now(), file=fp)
            time.sleep(1)

    fp.close()
def download_from_sra():

    configpath = Config.configpath
    config = Config.sys_config
    partition = config['process_server']['partition']
    cluster_status = cluster_stats.ClusterStats(configpath)
    max_fastq_file_number = int(
        Config.sys_config['process_server']['max_fastq_file_number'])
    max_fails = int(Config.sys_config['process_server']['max_fails'])
    max_restarts = int(Config.sys_config['process_server']['max_restarts'])

    # TODO scratch = cluster_status.get_scratch_use()
    # check disk space availability
    #if (scratch['quota'] - scratch['used']) < float(config['process_server']['min_disk_space_avail']):
    #    return

    sample_queue = requests_from_cistromeDB.SampleQueue(configpath)
    sample_queue.read_local_queue()
    samples_to_process = sample_queue.get_local_queue()

    fp = open('schedule_sra_log.txt', 'a')

    for gsmid, sample_info in samples_to_process.items():

        # don't download if there are already enough files to process
        if get_fastq_sample_number() > max_fastq_file_number:
            print('too many fastq files',
                  get_fastq_sample_number(),
                  max_fastq_file_number,
                  file=fp)
            break

        print(gsmid, file=fp)

        log_path = get_sra_log_path(gsmid)

        # check results have not been sent back already
        if transfer_complete_check(gsmid) == True:
            continue

        # check fastq check-file does not exist
        if fastq_check(gsmid) == True:
            continue

        # limit number of restarts
        if sample_queue.get_sample_restart_count(
                sample_id=gsmid) >= max_restarts:
            continue

        # limit number of download tries
        if sample_queue.get_sample_status_count(sample_id=gsmid,
                                                info_key='SRA') > max_fails:
            continue

        # check number of jobs pending
        cluster_status.get_jobs_in_queue()
        if (cluster_status.get_pending_job_count() > int(
                Config.sys_config['process_server']['max_jobs_pending'])):
            print(gsmid,
                  'too many jobs pending',
                  cluster_status.get_pending_job_count(),
                  int(Config.sys_config['process_server']['max_jobs_pending']),
                  file=fp)
            break

        # check number of jobs running
        if (cluster_status.get_running_job_count() > int(
                Config.sys_config['process_server']['max_jobs_running'])):
            print(gsmid,
                  'too many jobs running',
                  cluster_status.get_running_job_count(),
                  int(Config.sys_config['process_server']['max_jobs_running']),
                  file=fp)
            break

        # check job is not already in queue
        jobname = f'{gsmid}_sra'
        if cluster_status.is_job_name_in_queue(
                f'{gsmid}_sra') == True:  # SRA download job name: {ID}_sra
            continue

        cmd = f'python sra_download.py -c {configpath} -i {gsmid}'

        sbatch_path = os.path.join(
            Config.sys_config['paths']['data_collection_sbatch'],
            f'{jobname}.sbatch')

        if DEBUG:
            sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 300 --mem 2000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path}'
        else:
            sbatch_cmd = f'python sbatch_header.py --cmd "{cmd}" --time 300 --mem 2000 --partition {partition} --jobname {jobname} --sbatchfile {sbatch_path} --log {log_path} --submit'

        subprocess.run(sbatch_cmd, shell=True)
        time.sleep(1)
        print(datetime.datetime.now(), file=fp)
    fp.close()