Example #1
0
def runOnSGBs(configFile):
    config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation())
    config.read(configFile)
    run_pipeline = config['run_pipeline']
    if not os.path.exists(run_pipeline['representatives']):
            EatOrKeepSmallRepresentatives.run(configFile)
    SelectedSGBs=getAllSGBs(run_pipeline['representatives'],
               run_pipeline['genomes_dir'],
               run_pipeline['all_large_or_new_sgbs'])
    if not os.path.exists(run_pipeline['stage1output']):
        print ("Making representatives fasta", time.ctime())
        buildRepresentatives.run(SelectedSGBs,configFile)
        print ("Bulding Bowtie index", time.ctime())
        build_big_bowtie.run(configFile)
        with open(run_pipeline['stage1output'],'w') as donefile:
            donefile.write('Done\n')
    basedir = run_pipeline['qp_base_dir']
    score_output = run_pipeline['score_output']
    sethandlers()
    os.chdir(basedir)
    print ("Starting")
    with qp(jobname='build', q=['himem7.q']) as q:
        q.startpermanentrun()
        waiton = []
        chucksize=50
        count=0
        for chunkSGBsIDs in range(0,len(SelectedSGBs),chucksize):
            chunkSGBs=SelectedSGBs.loc[count*chucksize:chucksize*(count+1)-1]
            count+=1
            waiton.append(q.method(runChuckOfSGBs, (chunkSGBs, configFile)))
        q.wait(waiton)
        print ("Done running on %s SGBs"%len(waiton))
    print ("Done", time.ctime())
    return
import os
import mwas_annot
from LabQueue.qp import qp, fakeqp
from LabUtils.addloglevels import sethandlers

# parameters
body_site = 'Oral'  # TODO: don't forget to update majmin

output_dir = f'/net/mraid08/export/genie/LabData/Analyses/saarsh/PNP3_mwas/PNP3_mwas_{body_site.lower()}_0months_subtraction'
jobs_path = os.path.join(output_dir, 'jobs')
mwas_file_path = os.path.join(output_dir, f'mb_gwas_significant.h5')

# run
os.chdir(jobs_path)
sethandlers()

with qp(jobname=f'annot_{body_site}',
        _delete_csh_withnoerr=True,
        q=['himem7.q'],
        max_r=2,
        _mem_def='5G') as q:
    q.startpermanentrun()
    snps = q.method(mwas_annot.run, (mwas_file_path, output_dir, body_site))
    q.waitforresult(snps)
Example #3
0
import os
import glob
from LabQueue.qp import qp
from LabUtils.addloglevels import sethandlers
from LabData.DataLoaders.MBSNPLoader import OralMBSNPLoader


def func():
    potential_species = glob.glob('/home/saarsh/Genie/LabData/Data/MBPipeline/Analyses/MBSNP/Oral/MAF/mb_snp_maf_SGB_*_R1_S100.h5')
    potential_species = ['SGB_' + s.split('_')[-3] for s in potential_species]
    done_species = glob.glob('/home/saarsh/Genie/LabData/Data/MBPipeline/Analyses/MBSNP/Oral/MAF/mb_snp_annot_maf_SGB_*_R1_S100.h5')
    done_species = ['SGB_' + s.split('_')[-3] for s in done_species]
    species = list(set(potential_species) - set(done_species))

    ld = OralMBSNPLoader()
    ld._gen_species_set_maf_annot_data(species, min_reads_per_snp=1, min_samples_per_snp_cached=100)
    # TODO: make sure the gene annotation loader is using the OralMBLoader and not the Gut


sethandlers(file_dir='/home/saarsh/Analysis/antibiotics/jobs/')
os.chdir('/home/saarsh/Analysis/antibiotics/jobs/')

with qp(jobname='annot', _delete_csh_withnoerr=True, q=['himem7.q']) as q:
    q.startpermanentrun()
    tkttores = {}
    tkttores[0] = q.method(func)
    for k, v in tkttores.items():
        q.waitforresult(v)
jobs_dir = '/net/mraid08/export/jafar/Microbiome/Analyses/saar/NLDcopmJobs'


def func(folder):
    cmd = f'rm -Rf {folder} &'
    print(cmd)
    _shell_command(cmd)

    # files = glob.glob(os.path.join(folder, '*'))
    # len_files = len(files)
    # for i_file, file in enumerate(files):
    #     if not os.path.isdir(file):
    #         print(f'file {i_file + 1}/{len_files}')
    #             _shell_command('gzip -9 ' + file)


# queue
os.chdir(jobs_dir)
sethandlers(file_dir=jobs_dir)

with qp(jobname='NLDcomp', _mem_def='10G', _tryrerun=False) as q:
    q.startpermanentrun()
    tkttores = {}

    for i_folder, folder in enumerate(folders):
        tkttores[i_folder] = q.method(func, [folder])

    for k, v in tkttores.items():
        q.waitforresult(v)
# annotate
def run_prokka(input_fasta):
    shell_command(prokka_cmd.format(os.path.join(os.path.dirname(input_fasta), prokka_version), input_fasta))


def run_eggnog(input_fasta):
    eggnog_dir = os.path.join(os.path.dirname(os.path.dirname(input_fasta)), eggnog_version)
    os.mkdir(eggnog_dir)
    shell_command(eggnog_cmd.format(input_fasta, os.path.join(eggnog_dir, 'eggnog')))


os.chdir(jobs_dir)
sethandlers(file_dir=jobs_dir)

# run prokka
with qp(jobname='prokka', _mem_def='4G', _trds_def=2, _tryrerun=True, _specific_nodes='plink') as q:
    q.startpermanentrun()
    tkttores = {}

    rep_paths = glob.glob(os.path.join(output_dir, '*', '*.fa'))
    for rep_path in rep_paths:
        tkttores[rep_path] = q.method(run_prokka, [rep_path])

    for k, v in tkttores.items():
        q.waitforresult(v)

# run eggnog
with qp(jobname='eggnog', _mem_def='40G', _trds_def=16, _tryrerun=True, _specific_nodes='plink') as q:
    q.startpermanentrun()
    tkttores = {}
Example #6
0
from LabQueue.qp import qp, fakeqp
from LabUtils.addloglevels import sethandlers

# parameters
output_dir = '/net/mraid08/export/genie/LabData/Analyses/saarsh/anti_mwas_processed/annotations'

x_mwas_files_path = '/net/mraid08/export/genie/LabData/Analyses/saarsh/anti_mwas_raw/mb_gwas_SGB_*.h5'
y_mwas_files_path = '/net/mraid08/export/genie/LabData/Analyses/saarsh/anti_mwas_processed/*/SGB_*.h5'

mwas_file_path = os.path.join(output_dir, 'snps_codons.h5')

jobs_path = '/net/mraid08/export/jafar/Microbiome/Analyses/saar/antibiotics/jobs'

# run
os.chdir(jobs_path)
sethandlers()

with qp(jobname='annot',
        _delete_csh_withnoerr=True,
        q=['himem7.q'],
        max_r=1,
        _mem_def='20G') as q:
    q.startpermanentrun()

    # snps_unique = q.method(mwas_annot.find_unique_snps,
    #                        (x_mwas_files_path, y_mwas_files_path, output_dir, 'Pval', 0.05/26068850133))
    # q.waitforresult(snps_unique)

    snps = q.method(mwas_annot.run, (mwas_file_path, output_dir))
    q.waitforresult(snps)
run_type = 'between_species'

input_dir = os.path.join(
    '/net/mraid08/export/genie/LabData/Analyses/saarsh/anti_mwas_processed',
    run_type)
output_dir = os.path.join(
    '/net/mraid08/export/jafar/Microbiome/Analyses/saar/antibiotics/figs/new',
    run_type)
jobs_dir = '/net/mraid08/export/jafar/Microbiome/Analyses/saar/antibiotics/jobs/'

# queue
os.chdir(jobs_dir)
sethandlers(file_dir=jobs_dir)

with qp(jobname=run_type, _tryrerun=True) as q:
    q.startpermanentrun()
    tkttores = {}

    print('start sending jobs')
    for file in glob.glob(os.path.join(input_dir, 'SGB_*.h5')):  # 9710, 10068
        kwargs = {
            'mwas_fname':
            file,
            'out_dir':
            output_dir,
            'manhattan_draw_func':
            color_by_coef,
            'manhattan_text_func':
            text_func_annotated_between
            if run_type == 'between_species' else text_func_annotated_within,