def submit(): files = glob.glob(path_snplist+'/*.txt') #[0:2], OBS: folder also contains "not_mapped.log" #files = ['/home/unix/ptimshel/git/snpsnap/samples/sample_10randSNPs_fewmatches.list'] files.sort() processes = [] for (counter, filename) in enumerate(files, start=1): filename = re.sub(r'[()]', '', filename) #### OBS: changing file names! pheno = os.path.splitext(os.path.basename(filename))[0] print "processing file #%d/#%d: %s" % (counter, len(files), pheno) user_snps_file = filename # full path output_dir = path_output_sub+"/"+pheno HelperUtils.mkdirs(output_dir) command_shell = "python {program:s} --user_snps_file {snplist:s} --output_dir {outputdir:s} --distance_type ld --distance_cutoff 0.5 match --N_sample_sets {N} --max_freq_deviation {freq} --max_distance_deviation {dist} --max_genes_count_deviation {gene_count}".format(program=script2call, snplist=filename, outputdir=output_dir, N=10000, freq=2, dist=5, gene_count=5) #command_seq = "--user_snps_file {snplist:s} --output_dir {outputdir:s} --distance_type ld --distance_cutoff 0.5 match --N_sample_sets {N} --max_freq_deviation {freq} --max_distance_deviation {dist} --max_genes_count_deviation {gene_count}".format(snplist=filename, outputdir=output_dir, N=1000, freq=5, dist=20, gene_count=20) #print command_shell processes.append( LaunchSubprocess(cmd=command_shell, logdir=log_dir_path, log_root=current_script_name, file_output=pheno+'.txt', jobname=pheno) ) # #time.sleep(1) for p in processes: #p.run_Log() # writes stdout and stdout to "file_output" file p.run_Pipe() return processes
def submit(): files = glob.glob(path_snplist+'/*.txt') #[0:2] #OBS: folder also contains "not_mapped.log" #files = ['/home/unix/ptimshel/git/snpsnap/samples/sample_10randSNPs_fewmatches.list'] files.sort() processes = [] for (counter, filename) in enumerate(files, start=1): # tmp_before = os.path.splitext(os.path.basename(filename))[0] # tmp_after = re.sub(r'[()]', '', tmp_before) #### OBS: changing file names! # tmp_before_red = tmp_before.replace('(', '\(').replace(')', '\)') # if "(" in tmp_before: # print "mv {}.txt {}.txt".format(tmp_before_red, tmp_after) # continue #filename = re.sub(r'[()]', '', filename) #### OBS: changing file names! pheno = os.path.splitext(os.path.basename(filename))[0] print "processing file #%d/#%d: %s" % (counter, len(files), pheno) user_snps_file = filename # full path output_dir = path_output_sub+"/"+pheno HelperUtils.mkdirs(output_dir) #TODO: consider the potential problems with 'use' environment command_shell = "python {program:s} --user_snps_file {snplist:s} --output_dir {outputdir:s} --distance_type ld --distance_cutoff 0.5 match --N_sample_sets {N} --max_freq_deviation {freq} --max_distance_deviation {dist} --max_genes_count_deviation {gene_count}".format(program=script2call, snplist=filename, outputdir=output_dir, N=1000, freq=5, dist=20, gene_count=20) processes.append( LaunchBsub(cmd=command_shell, queue_name=queue_name, walltime=walltime, mem=mem, jobname=pheno, projectname='snpsnp', logdir=log_dir_path, log_root=current_script_name, file_output=pheno+'.txt', no_output=False, email=email) ) # for p in processes: p.run() return processes
################ Constants ############ script2call = "/home/unix/ptimshel/git/snpsnap/snpsnap_query.py" # Updated path current_script_name = os.path.basename(__file__).replace('.py','') path_snplist = "/cvar/jhlab/snpsnap/data/input_lists/gwascatalog_140201_listsBIGbim" #path_snplist = "/home/projects/tp/childrens/snpsnap/data/gwas/gwascatalog_140201_listsBIGbim" #path_snplist = "/home/projects/tp/childrens/snpsnap/data/gwas/gwascatalog_140201_lists" #path_output_main = "/home/projects/tp/childrens/snpsnap/data/query/gwascatalog" path_output_main = "/cvar/jhlab/snpsnap/data/query/gwascatalog" path_output_sub = path_output_main + "/subprocess_output" HelperUtils.mkdirs(path_output_sub) log_dir_path = path_output_main + "/subprocess_log" HelperUtils.mkdirs(log_dir_path) processes = submit() # run_Pipe() method calls for p in processes: p.get_pid() with open(path_output_main+'/subprocess_gwastable.tab', 'w') as f: for p in processes: lines = p.process_communicate_and_read_pipe_lines() for (i, line) in enumerate(lines): #print line if "# rating_few_matches" in line: