Beispiel #1
0
def write_gemma_jobs(gemma_options, output_dir, logs_dir, input_dir, genotype_fn, execute):
    qname = "serial_requeue" 
    mem_usage = "1000"
    myos.check_if_directory_exists_create_it(output_dir)
    myos.check_if_directory_exists_create_it(logs_dir)
    batch_number = 1 # counter of batch
    batch_size = 1 # how many runs per batch
    job_number_within_batch = 0 # counter of job within batch
    pheno_files = os.listdir(input_dir)
    for pheno_n in pheno_files:
        pheno_fn = os.path.join(input_dir, pheno_n)
        if job_number_within_batch == 0:
            job_name = os.path.splitext(pheno_n)[0]
            bsubcmd = myos.write_bsub_string_no_rm_logs_dir(logs_dir, job_name, qname = qname, mem_usage = mem_usage, time = '1438')
            if os.path.exists(pheno_fn) is False or os.path.exists(genotype_fn) is False:
                print 'Oooops, One of these files to process does not exist!!! %s %s' %(pheno_fn, genotype_fn)
                return 0
        output_prefix = job_name
        exec_cmd = execs_commands.gemma(output_dir, gemma_options, pheno_fn, genotype_fn, output_prefix) 
        print exec_cmd
        job_script_fn = bsubcmd.split(' ')[-1]
        with open(job_script_fn, "a") as job_script_f:
            print bsubcmd 
            job_script_f.write('echo \"%s\"\n' %(exec_cmd))
            job_script_f.write(exec_cmd+'\n')
            job_number_within_batch += 1
        if job_number_within_batch == batch_size:
            if execute:
                os.system(bsubcmd)
            batch_number += 1
            job_number_within_batch = 0
    if job_number_within_batch > 0 and job_number_within_batch < batch_size:
        if execute:
            os.system(bsubcmd)
    return 0
def write_InferDPB_jobs(output_dir,
                        logs_dir,
                        input_dir,
                        ncore='4',
                        threadnum=4,
                        execute=False):
    qname = "serial_requeue"
    mem_usage = "1000"
    #threadnum = 4
    myos.check_if_directory_exists_create_it(output_dir)
    myos.check_if_directory_exists_create_it(logs_dir)
    #batch_number = 1 # counter of batch
    #batch_size = 1 # how many runs per batch
    #job_number_within_batch = 0 # counter of job within batch

    fn_array = np.array([0.1, 0.2, 0.4, 0.8, 0.9], float)
    fp_array = np.array([0.0001, 0.001, 0.002, 0.01], float)
    tmp = 1
    for fn in fn_array:
        for fp in fp_array:
            tmp = tmp + 1
            for i in range(1, 6):
                d2s = os.path.join(input_dir, 'Drug_Sub')
                p2d = os.path.join(input_dir, 'Protein_Domain')
                d2p = os.path.join(input_dir, 'Drug_Protein_' + str(i))
                s2d_in = os.path.join(input_dir, 'Sub_Domain_' + str(i))
                outname = 'Sub_Domain_Result_' + str(tmp) + '_' + str(i)
                job_name = 's2d_job' + '_' + str(tmp) + '_' + str(i)
                bsubcmd = myos.write_bsub_string_no_rm_logs_dir(
                    logs_dir,
                    job_name,
                    qname=qname,
                    mem_usage=mem_usage,
                    ncores=ncore,
                    time='1438')
                if os.path.exists(d2s) is False or os.path.exists(
                        p2d) is False or os.path.exists(
                            d2p) is False or os.path.exists(s2d_in) is False:
                    print "Cannot find some input files!"
                    return 0
                exec_cmd = execs_commands.inferDPB_fnfp(
                    output_dir, fn, fp, threadnum, d2s, p2d, d2p, s2d_in,
                    outname)
                print exec_cmd
                job_script_fn = bsubcmd.split(' ')[-1]
                with open(job_script_fn, 'a') as job_script_f:
                    print bsubcmd
                    job_script_f.write('echo \"%s\"\n' % (exec_cmd))
                    job_script_f.write(exec_cmd + '\n')
                if execute:
                    os.system(bsubcmd)
    return 0
Beispiel #3
0
def write_gemma_jobs(gemma_options, output_dir, logs_dir, input_dir,
                     genotype_fn, execute):
    qname = "serial_requeue"
    mem_usage = "1000"
    myos.check_if_directory_exists_create_it(output_dir)
    myos.check_if_directory_exists_create_it(logs_dir)
    batch_number = 1  # counter of batch
    batch_size = 1  # how many runs per batch
    job_number_within_batch = 0  # counter of job within batch
    pheno_files = os.listdir(input_dir)
    for pheno_n in pheno_files:
        pheno_fn = os.path.join(input_dir, pheno_n)
        if job_number_within_batch == 0:
            job_name = os.path.splitext(pheno_n)[0]
            bsubcmd = myos.write_bsub_string_no_rm_logs_dir(
                logs_dir,
                job_name,
                qname=qname,
                mem_usage=mem_usage,
                time='1438')
            if os.path.exists(pheno_fn) is False or os.path.exists(
                    genotype_fn) is False:
                print 'Oooops, One of these files to process does not exist!!! %s %s' % (
                    pheno_fn, genotype_fn)
                return 0
        output_prefix = job_name
        exec_cmd = execs_commands.gemma(output_dir, gemma_options, pheno_fn,
                                        genotype_fn, output_prefix)
        print exec_cmd
        job_script_fn = bsubcmd.split(' ')[-1]
        with open(job_script_fn, "a") as job_script_f:
            print bsubcmd
            job_script_f.write('echo \"%s\"\n' % (exec_cmd))
            job_script_f.write(exec_cmd + '\n')
            job_number_within_batch += 1
        if job_number_within_batch == batch_size:
            if execute:
                os.system(bsubcmd)
            batch_number += 1
            job_number_within_batch = 0
    if job_number_within_batch > 0 and job_number_within_batch < batch_size:
        if execute:
            os.system(bsubcmd)
    return 0
Beispiel #4
0
def write_InferDPB_jobs(output_dir, logs_dir, input_dir, ncore="4", threadnum=4, execute=False):
    qname = "serial_requeue"
    mem_usage = "1000"
    # threadnum = 4
    myos.check_if_directory_exists_create_it(output_dir)
    myos.check_if_directory_exists_create_it(logs_dir)
    # batch_number = 1 # counter of batch
    # batch_size = 1 # how many runs per batch
    # job_number_within_batch = 0 # counter of job within batch

    fn_array = np.array([0.1, 0.2, 0.4, 0.8, 0.9], float)
    fp_array = np.array([0.0001, 0.001, 0.002, 0.01], float)
    tmp = 1
    for fn in fn_array:
        for fp in fp_array:
            tmp = tmp + 1
            for i in range(1, 6):
                d2s = os.path.join(input_dir, "Drug_Sub")
                p2d = os.path.join(input_dir, "Protein_Domain")
                d2p = os.path.join(input_dir, "Drug_Protein_" + str(i))
                s2d_in = os.path.join(input_dir, "Sub_Domain_" + str(i))
                outname = "Sub_Domain_Result_" + str(tmp) + "_" + str(i)
                job_name = "s2d_job" + "_" + str(tmp) + "_" + str(i)
                bsubcmd = myos.write_bsub_string_no_rm_logs_dir(
                    logs_dir, job_name, qname=qname, mem_usage=mem_usage, ncores=ncore, time="1438"
                )
                if (
                    os.path.exists(d2s) is False
                    or os.path.exists(p2d) is False
                    or os.path.exists(d2p) is False
                    or os.path.exists(s2d_in) is False
                ):
                    print "Cannot find some input files!"
                    return 0
                exec_cmd = execs_commands.inferDPB_fnfp(output_dir, fn, fp, threadnum, d2s, p2d, d2p, s2d_in, outname)
                print exec_cmd
                job_script_fn = bsubcmd.split(" ")[-1]
                with open(job_script_fn, "a") as job_script_f:
                    print bsubcmd
                    job_script_f.write('echo "%s"\n' % (exec_cmd))
                    job_script_f.write(exec_cmd + "\n")
                if execute:
                    os.system(bsubcmd)
    return 0
Beispiel #5
0
def write_DECODE_jobs(logs_dir,
                      input_dir,
                      gene_residual_file,
                      tissuenm,
                      outdir,
                      splitnum=20,
                      execute=False):
    qname = "serial_requeue"
    mem_usage = "25000"
    myos.check_if_directory_exists_create_it(logs_dir)
    myos.check_if_directory_exists_create_it(outdir)
    genetotalnum = myos.wccount(input_dir + "genelocsnp")
    taskseq = splitinteger(genetotalnum, splitnum)
    for i in range(0, splitnum):
        job_name = 'gtex_decode' + '_' + str(taskseq[i][0]) + '_' + str(
            taskseq[i][1])
        bsubcmd = myos.write_bsub_string_no_rm_logs_dir(logs_dir,
                                                        job_name,
                                                        qname=qname,
                                                        mem_usage=mem_usage,
                                                        time='300')
        if os.path.exists(gene_residual_file) is False:
            print "Cannot find some input files!"
            return 0
        exec_cmd = execs_commands.gtex_decode(gene_residual_file,
                                              taskseq[i][0], taskseq[i][1],
                                              tissuenm, outdir)
        print exec_cmd
        job_script_fn = bsubcmd.split(' ')[-1]
        with open(job_script_fn, 'a') as job_script_f:
            print bsubcmd
            job_script_f.write('echo \"%s\"\n' % (exec_cmd))
            job_script_f.write(exec_cmd + '\n')
        if execute:
            os.system(bsubcmd)
    return 0