def runOnSGBs(configFile): config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation()) config.read(configFile) run_pipeline = config['run_pipeline'] if not os.path.exists(run_pipeline['representatives']): EatOrKeepSmallRepresentatives.run(configFile) SelectedSGBs=getAllSGBs(run_pipeline['representatives'], run_pipeline['genomes_dir'], run_pipeline['all_large_or_new_sgbs']) if not os.path.exists(run_pipeline['stage1output']): print ("Making representatives fasta", time.ctime()) buildRepresentatives.run(SelectedSGBs,configFile) print ("Bulding Bowtie index", time.ctime()) build_big_bowtie.run(configFile) with open(run_pipeline['stage1output'],'w') as donefile: donefile.write('Done\n') basedir = run_pipeline['qp_base_dir'] score_output = run_pipeline['score_output'] sethandlers() os.chdir(basedir) print ("Starting") with qp(jobname='build', q=['himem7.q']) as q: q.startpermanentrun() waiton = [] chucksize=50 count=0 for chunkSGBsIDs in range(0,len(SelectedSGBs),chucksize): chunkSGBs=SelectedSGBs.loc[count*chucksize:chucksize*(count+1)-1] count+=1 waiton.append(q.method(runChuckOfSGBs, (chunkSGBs, configFile))) q.wait(waiton) print ("Done running on %s SGBs"%len(waiton)) print ("Done", time.ctime()) return
import os import mwas_annot from LabQueue.qp import qp, fakeqp from LabUtils.addloglevels import sethandlers # parameters body_site = 'Oral' # TODO: don't forget to update majmin output_dir = f'/net/mraid08/export/genie/LabData/Analyses/saarsh/PNP3_mwas/PNP3_mwas_{body_site.lower()}_0months_subtraction' jobs_path = os.path.join(output_dir, 'jobs') mwas_file_path = os.path.join(output_dir, f'mb_gwas_significant.h5') # run os.chdir(jobs_path) sethandlers() with qp(jobname=f'annot_{body_site}', _delete_csh_withnoerr=True, q=['himem7.q'], max_r=2, _mem_def='5G') as q: q.startpermanentrun() snps = q.method(mwas_annot.run, (mwas_file_path, output_dir, body_site)) q.waitforresult(snps)
import os import glob from LabQueue.qp import qp from LabUtils.addloglevels import sethandlers from LabData.DataLoaders.MBSNPLoader import OralMBSNPLoader def func(): potential_species = glob.glob('/home/saarsh/Genie/LabData/Data/MBPipeline/Analyses/MBSNP/Oral/MAF/mb_snp_maf_SGB_*_R1_S100.h5') potential_species = ['SGB_' + s.split('_')[-3] for s in potential_species] done_species = glob.glob('/home/saarsh/Genie/LabData/Data/MBPipeline/Analyses/MBSNP/Oral/MAF/mb_snp_annot_maf_SGB_*_R1_S100.h5') done_species = ['SGB_' + s.split('_')[-3] for s in done_species] species = list(set(potential_species) - set(done_species)) ld = OralMBSNPLoader() ld._gen_species_set_maf_annot_data(species, min_reads_per_snp=1, min_samples_per_snp_cached=100) # TODO: make sure the gene annotation loader is using the OralMBLoader and not the Gut sethandlers(file_dir='/home/saarsh/Analysis/antibiotics/jobs/') os.chdir('/home/saarsh/Analysis/antibiotics/jobs/') with qp(jobname='annot', _delete_csh_withnoerr=True, q=['himem7.q']) as q: q.startpermanentrun() tkttores = {} tkttores[0] = q.method(func) for k, v in tkttores.items(): q.waitforresult(v)
jobs_dir = '/net/mraid08/export/jafar/Microbiome/Analyses/saar/NLDcopmJobs' def func(folder): cmd = f'rm -Rf {folder} &' print(cmd) _shell_command(cmd) # files = glob.glob(os.path.join(folder, '*')) # len_files = len(files) # for i_file, file in enumerate(files): # if not os.path.isdir(file): # print(f'file {i_file + 1}/{len_files}') # _shell_command('gzip -9 ' + file) # queue os.chdir(jobs_dir) sethandlers(file_dir=jobs_dir) with qp(jobname='NLDcomp', _mem_def='10G', _tryrerun=False) as q: q.startpermanentrun() tkttores = {} for i_folder, folder in enumerate(folders): tkttores[i_folder] = q.method(func, [folder]) for k, v in tkttores.items(): q.waitforresult(v)
# annotate def run_prokka(input_fasta): shell_command(prokka_cmd.format(os.path.join(os.path.dirname(input_fasta), prokka_version), input_fasta)) def run_eggnog(input_fasta): eggnog_dir = os.path.join(os.path.dirname(os.path.dirname(input_fasta)), eggnog_version) os.mkdir(eggnog_dir) shell_command(eggnog_cmd.format(input_fasta, os.path.join(eggnog_dir, 'eggnog'))) os.chdir(jobs_dir) sethandlers(file_dir=jobs_dir) # run prokka with qp(jobname='prokka', _mem_def='4G', _trds_def=2, _tryrerun=True, _specific_nodes='plink') as q: q.startpermanentrun() tkttores = {} rep_paths = glob.glob(os.path.join(output_dir, '*', '*.fa')) for rep_path in rep_paths: tkttores[rep_path] = q.method(run_prokka, [rep_path]) for k, v in tkttores.items(): q.waitforresult(v) # run eggnog with qp(jobname='eggnog', _mem_def='40G', _trds_def=16, _tryrerun=True, _specific_nodes='plink') as q: q.startpermanentrun() tkttores = {}
from LabQueue.qp import qp, fakeqp from LabUtils.addloglevels import sethandlers # parameters output_dir = '/net/mraid08/export/genie/LabData/Analyses/saarsh/anti_mwas_processed/annotations' x_mwas_files_path = '/net/mraid08/export/genie/LabData/Analyses/saarsh/anti_mwas_raw/mb_gwas_SGB_*.h5' y_mwas_files_path = '/net/mraid08/export/genie/LabData/Analyses/saarsh/anti_mwas_processed/*/SGB_*.h5' mwas_file_path = os.path.join(output_dir, 'snps_codons.h5') jobs_path = '/net/mraid08/export/jafar/Microbiome/Analyses/saar/antibiotics/jobs' # run os.chdir(jobs_path) sethandlers() with qp(jobname='annot', _delete_csh_withnoerr=True, q=['himem7.q'], max_r=1, _mem_def='20G') as q: q.startpermanentrun() # snps_unique = q.method(mwas_annot.find_unique_snps, # (x_mwas_files_path, y_mwas_files_path, output_dir, 'Pval', 0.05/26068850133)) # q.waitforresult(snps_unique) snps = q.method(mwas_annot.run, (mwas_file_path, output_dir)) q.waitforresult(snps)
run_type = 'between_species' input_dir = os.path.join( '/net/mraid08/export/genie/LabData/Analyses/saarsh/anti_mwas_processed', run_type) output_dir = os.path.join( '/net/mraid08/export/jafar/Microbiome/Analyses/saar/antibiotics/figs/new', run_type) jobs_dir = '/net/mraid08/export/jafar/Microbiome/Analyses/saar/antibiotics/jobs/' # queue os.chdir(jobs_dir) sethandlers(file_dir=jobs_dir) with qp(jobname=run_type, _tryrerun=True) as q: q.startpermanentrun() tkttores = {} print('start sending jobs') for file in glob.glob(os.path.join(input_dir, 'SGB_*.h5')): # 9710, 10068 kwargs = { 'mwas_fname': file, 'out_dir': output_dir, 'manhattan_draw_func': color_by_coef, 'manhattan_text_func': text_func_annotated_between if run_type == 'between_species' else text_func_annotated_within,