def __init__(self, cmd, path_stdout=os.getcwd(), logger=False, jobname='NoJobName'): #file_output=os.path.join(os.getcwd(), __name__+'.tmp.log' self.path_stdout = HelperUtils.check_if_writable(path_stdout) if logger: #TODO: check that logger is of class Logger? self.logger = logger else: # create new logger, with name e.g. LaunchSubprocess_NoLoggerParsed_2014-05-15_23.08.59.log #self.logger = Logger(self.__class__.__name__+"_NoLoggerParsed", path_stdout).get() # BEFORE JUNE 2014 self.logger = Logger(name=self.__class__.__name__+"_NoLoggerParsed"+HelperUtils.gen_timestamp(), log_dir=path_stdout, log_format=1, enabled=True).get() self.jobname = jobname self.cmd = cmd
def submit(): files = glob.glob(path_snplist+'/*.txt') #OBS: folder also contains "not_mapped.log" #files = ['/home/unix/ptimshel/git/snpsnap/samples/sample_10randSNPs_fewmatches.list'] files.sort() processes = [] for (counter, filename) in enumerate(files, start=1): pheno = os.path.splitext(os.path.basename(filename))[0] logger.info( "processing file #%d/#%d: %s" % (counter, len(files), pheno) ) user_snps_file = filename # full path output_dir = path_output_sub+"/"+pheno HelperUtils.mkdirs(output_dir) #TODO: consider the potential problems with 'use' environment #TODO: reuse Python-2.7 && bsub [...] # Put this inside LaunchBsub!! command_shell = "python {program:s} --user_snps_file {snplist:s} --output_dir {outputdir:s} --distance_type ld --distance_cutoff 0.5 match --N_sample_sets {N} --max_freq_deviation {freq} --max_distance_deviation {dist} --max_genes_count_deviation {gene_count}".format(program=script2call, snplist=filename, outputdir=output_dir, N=10000, freq=1, dist=5, gene_count=5) processes.append( LaunchBsub(cmd=command_shell, queue_name=queue_name, walltime=walltime, mem=mem, jobname=pheno, projectname='snpsnp', path_stdout=path_stdout, file_output=pheno+'.txt', no_output=False, email=email, logger=logger) ) # for p in processes: p.run() time.sleep(args.pause) return processes
def submit(): files = glob.glob(path_snplist+'/*.txt') #[0:2], OBS: folder also contains "not_mapped.log" #files = ['/home/unix/ptimshel/git/snpsnap/samples/sample_10randSNPs_fewmatches.list'] files.sort() processes = [] for (counter, filename) in enumerate(files, start=1): filename = re.sub(r'[()]', '', filename) #### OBS: changing file names! pheno = os.path.splitext(os.path.basename(filename))[0] logger.info( "processing file #%d/#%d: %s" % (counter, len(files), pheno) ) user_snps_file = filename # full path output_dir = path_output_sub+"/"+pheno HelperUtils.mkdirs(output_dir) command_shell = "python {program:s} --user_snps_file {snplist:s} --output_dir {outputdir:s} --distance_type ld --distance_cutoff 0.5 match --N_sample_sets {N} --max_freq_deviation {freq} --max_distance_deviation {dist} --max_genes_count_deviation {gene_count}".format(program=script2call, snplist=filename, outputdir=output_dir, N=N_sample_sets, freq=freq, dist=dist, gene_count=gene_count) #command_seq = "--user_snps_file {snplist:s} --output_dir {outputdir:s} --distance_type ld --distance_cutoff 0.5 match --N_sample_sets {N} --max_freq_deviation {freq} --max_distance_deviation {dist} --max_genes_count_deviation {gene_count}".format(snplist=filename, outputdir=output_dir, N=1000, freq=5, dist=20, gene_count=20) #print command_shell processes.append( LaunchSubprocess(cmd=command_shell, path_stdout=path_stdout, logger=logger, jobname=pheno) ) # #time.sleep(1) #p.run_Log(file_output=pheno+'.txt') # writes stdout and stdout to "file_output" file in PATH path_stdout. NO WAITING since output goes to file for p in processes: p.run_Pipe() return processes
###################################### Global params ###################################### queue_name = "hour" # [bhour, bweek] priority #queue_name = "priority" # [bhour, bweek] priority walltime="59" # hh:mmm, e.g. [24:00=1day | 10:00=10hrs | 120=2hrs | 1:0=1hrs mem="1" # gb #email='*****@*****.**' email=False script2call = "/home/unix/ptimshel/git/snpsnap/snpsnap_query.py" # Updated path current_script_name = os.path.basename(__file__).replace('.py','') path_snplist = "/cvar/jhlab/snpsnap/data/input_lists/gwascatalog_140201_listsBIGbim" path_output_main = "/cvar/jhlab/snpsnap/data/query/gwascatalog" path_output_sub = path_output_main + "/output" HelperUtils.mkdirs(path_output_sub) path_stdout = path_output_main + "/stdout" HelperUtils.mkdirs(path_stdout) ###################################### ARGUMENTS ###################################### args = ParseArguments() ###################################### LOGGER ###################################### ## Setup-logger #logger = Logger(__name__, path_stdout).get() # gives __name__ == main logger = Logger(current_script_name, path_stdout).get() #loglevel = getattr(logging, args.logger_lvl.upper(), logging.INFO) # returns some numeric value loglevel = getattr(logging, args.logger_lvl.upper()) # returns some numeric value logger.setLevel(loglevel) #logger.setLevel(logging.INFO) #logger.setLevel(logging.WARNING)
def __init__(self, cmd, queue_name, mem, shell_script_path, parallel = False,num_parallel = None,proc=None, shared_mem=None, runlimit=None, app=None, jobname='NoJobName', projectname='NoProjectName', path_stdout=os.getcwd(), join_outputs='yes',file_output=None, no_output=False, email=None, email_status_notification=False, email_report=False, logger=False, cmd_custom=None): #file_output=os.path.join(os.getcwd(), __name__+'.tmp.log' LaunchQsub.LB_job_counter += 1 # Counter the number of jobs self.job_number = LaunchQsub.LB_job_counter self.path_stdout = HelperUtils.check_if_writable(path_stdout) if logger: #TODO: check that logger is of class Logger? self.logger = logger else: # create new logger, with name e.g. LauchQsub_NoLoggerParsed_2014-05-15_23.08.59.log self.logger = Logger(name=self.__class__.__name__+"_NoLoggerParsed"+HelperUtils.gen_timestamp(), log_dir=path_stdout, log_format=1, enabled=True).get() #OBS: updating variable if no_output: self.file_output = '/dev/null' elif file_output is None: file_output = "qsub_outfile_ID{job_number}_{jobname}.{ext}".format(job_number=self.job_number, jobname=jobname, ext='out') self.file_output = os.path.join(self.path_stdout, file_output) # OBS: overwriting variable. Change this! else: self.file_output = os.path.join(self.path_stdout, file_output) # OBS: overwriting variable. Change this! self.jobname = jobname self.projectname = projectname self.status = "" self.attempts = 0 #TODO: overwrite output files with -oo ? #TODO self.mem_per_process ## M --> a per-process (soft) memory limit #TODO: span[ptile=value] #Indicates the number of processors on each host that should be allocated to the job, where value is one of the following: #-R span[ptile=<x>] # x denotes the exact number of job slots to be used on each host. If the total process number is not divisible by x, the residual processes will be put on one host. #**DOES THIS OPTION EXISTS ON THE BROAD LSF 7.0.6? #TODO: "-w" option to qsub to set up the dependencies #TODO: I/O resource requests --> qsub -R "rusage[indium_io=3]" ... ## "df -k ." --> gets the file ## /broad/tools/scripts/io_resource_for_file . --> #TODO: BsUB -x - request exclusive access to the nodes self.p_queue_name = queue_name # string self.p_mem = mem # in GB self.p_proc = proc self.p_runlimit = runlimit self.p_shared_mem = shared_mem # boolean value. self.p_app = app self.shell_script_path = shell_script_path if parallel == True: parallel_processing = '-pe smp %d' %int(num_parallel) else: parallel_processing = '' self.join_outputs = join_outputs if join_outputs == 'no': join_outputs = 'n' elif join_outputs == 'yes': join_outputs = 'y' self.cmd = cmd # this is the command/program to run. THIS IS APPENDED TO the qsub command self.bcmd = "qsub -cwd -N {jobname} -o {output} -j {join_outputs} -q {queue} -l m_mem_free={mem}g -b n {parallel_processing} {shell_script_path}".format(project=self.projectname, jobname=self.jobname, join_outputs = join_outputs, output=self.file_output, queue=self.p_queue_name, mem=self.p_mem, parallel_processing = parallel_processing,shell_script_path = self.shell_script_path) if self.p_shared_mem: addon = "-R 'span[hosts=1]'" # OBS: remember the QUOTES! self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon) if self.p_proc: addon = "-n {}".format(self.p_proc) self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon) if self.p_runlimit: addon = "-W {}".format(self.p_runlimit) self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon) if self.p_app: addon = "-app {}".format(self.p_app) self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon) if email: # *Question: can I get the report in both an email and the stdout file? ANSWER ---> NO! addon = "-u {}".format(email) self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon) if email_status_notification: # -B # -B: Sends email to the job submitter when the job is dispatched and begins running addon = "-B" self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon) if email_report: # -N # -N: If you want to separate the job report information from the job output, use the -N option to specify that the job report information should be sent by email. addon = "-N" self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon) ### GENERATING CALL self.call = '' if not cmd_custom: self.call = self.bcmd + " " + self.cmd else: self.logger.warning( "OBS: custom command parsed to LaunchQsub(). The command is: %s" % cmd_custom ) self.call = cmd_custom