def main(argv = None): via = "lsf" limspath = None ms_limspath = "/data/htsstation/mapseq/mapseq_minilims" hts_key = None working_dir = None config = None if argv is None: argv = sys.argv try: try: opts,args = getopt.getopt(sys.argv[1:],"hu:k:d:w:m:c:", ["help","via","key","minilims", "mapseq_minilims", "working-directory","config"]) except getopt.error, msg: raise Usage(msg) for o, a in opts: if o in ("-h", "--help"): print __doc__ print usage sys.exit(0) elif o in ("-u", "--via"): if a=="local": via = "local" elif a=="lsf": via = "lsf" else: raise Usage("Via (-u) can only be \"local\" or \"lsf\", got %s." % (a,)) elif o in ("-w", "--working-directory"): if os.path.exists(a): os.chdir(a) working_dir = a else: raise Usage("Working directory '%s' does not exist." % a) elif o in ("-d", "--minilims"): limspath = a elif o in ("-m", "--mapseq_minilims"): ms_limspath = a elif o in ("-k", "--key"): hts_key = a elif o in ("-c", "--config"): config_file = a else: raise Usage("Unhandled option: " + o) M = MiniLIMS( limspath ) if len(hts_key)>1: gl = use_pickle( M, "global variables" ) htss = frontend.Frontend( url=gl['hts_4cseq']['url'] ) job = htss.job( hts_key ) elif os.path.exists(config_file): (job,gl) = frontend.parseConfig( config_file ) else: raise ValueError("Need either a job key (-k) or a configuration file (-c).") mapseq_url = None if 'hts_mapseq' in gl: mapseq_url = gl['hts_mapseq']['url'] job.options['ucsc_bigwig'] = True g_rep = genrep.GenRep( gl["genrep_url"], gl.get("bwt_root") ) assembly = g_rep.assembly( job.assembly_id ) primers_file='/scratch/cluster/monthly/htsstation/4cseq/'+str(job.id)+'/primers.fa' primers_dict=c4seq.loadPrimers(primers_file) with execution( M, description=hts_key, remote_working_directory=working_dir ) as ex: (mapseq_files, job) = mapseq.get_bam_wig_files( ex, job, ms_limspath, mapseq_url, suffix=['merged'],script_path=gl['script_path'], via=via ) c4seq_files = c4seq.workflow_groups( ex, job, primers_dict, g_rep, mapseq_files, mapseq_url, gl['script_path']) ucscfiles = common.get_files( ex.id, M, select_param={'ucsc':'1'} ) with open(hts_key+".bed",'w') as ucscbed: for ftype,fset in ucscfiles.iteritems(): for ffile,descr in fset.iteritems(): ucscbed.write(common.track_header(descr,ftype,gl['hts_4cseq']['download'],ffile)) allfiles = common.get_files( ex.id, M ) # gdv_project = gdv.create_gdv_project( gl['gdv']['key'], gl['gdv']['email'], # job.description, # assembly.nr_assembly_id, # gdv_url=gl['gdv']['url'], public=True ) # add_pickle( ex, gdv_project, description='py:gdv_json' ) # if 'sql' in allfiles: # allfiles['url'] = {gdv_project['public_url']: 'GDV view'} # download_url = gl['hts_4cseq']['download'] # [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'], # gdv_project['project_id'], # url=download_url+str(k), # name = re.sub('\.sql','',str(f)), # gdv_url=gl['gdv']['url']) # for k,f in allfiles['sql'].iteritems()] print json.dumps(allfiles) with open(hts_key+".done",'w') as done: json.dump(allfiles,done) if 'email' in gl: r = email.EmailReport( sender=gl['email']['sender'], to=str(job.email), subject="4cseq job "+str(job.description), smtp_server=gl['email']['smtp'] ) r.appendBody(''' Your 4C-seq job is finished. The description was: '''+str(job.description)+''' and its unique key is '''+hts_key+'''. You can retrieve the results at this url: '''+gl['hts_4cseq']['url']+"jobs/"+hts_key+"/get_results") r.send() sys.exit(0)
def main(argv = None): via = "lsf" limspath = None ms_limspath = "/data/htsstation/mapseq/mapseq_minilims" hts_key = '' working_dir = None config_file = None if argv is None: argv = sys.argv try: try: opts,args = getopt.getopt(sys.argv[1:],"hu:k:d:w:m:c:", ["help","via=","key=","minilims=", "mapseq_minilims=", "working-directory=","config="]) except getopt.error, msg: raise Usage(msg) for o, a in opts: if o in ("-h", "--help"): print __doc__ print usage return 0 elif o in ("-u", "--via"): if a=="local": via = "local" elif a=="lsf": via = "lsf" else: raise Usage("Via (-u) can only be \"local\" or \"lsf\", got %s." % (a,)) elif o in ("-w", "--working-directory"): if os.path.exists(a): os.chdir(a) working_dir = a else: raise Usage("Working directory '%s' does not exist." % a) elif o in ("-d", "--minilims"): limspath = a elif o in ("-m", "--mapseq_minilims"): ms_limspath = a elif o in ("-k", "--key"): hts_key = a elif o in ("-c", "--config"): config_file = a else: raise Usage("Unhandled option: " + o) if not(limspath and os.path.exists(limspath) and (hts_key != None or (config_file and os.path.exists(config_file)))): raise Usage("Need a minilims and a job key or a configuration file") M = MiniLIMS( limspath ) if len(hts_key)>1: gl = use_pickle( M, "global variables" ) htss = frontend.Frontend( url=gl['hts_chipseq']['url'] ) job = htss.job( hts_key ) [M.delete_execution(x) for x in M.search_executions(with_description=hts_key,fails=True)] elif os.path.exists(config_file): (job,gl) = frontend.parseConfig( config_file ) hts_key = job.description else: raise ValueError("Need either a job key (-k) or a configuration file (-c).") mapseq_url = None if 'hts_mapseq' in gl: mapseq_url = gl['hts_mapseq']['url'] job.options['ucsc_bigwig'] = True g_rep = genrep.GenRep( gl["genrep_url"], gl.get("bwt_root") ) assembly = g_rep.assembly( job.assembly_id ) logfile = open(hts_key+".log",'w') with execution( M, description=hts_key, remote_working_directory=working_dir ) as ex: logfile.write("Enter execution, fetch bam and wig files.\n");logfile.flush() (mapped_files, job) = mapseq.get_bam_wig_files( ex, job, minilims=ms_limspath, hts_url=mapseq_url, script_path=gl.get('script_path') or '', via=via ) logfile.write("Starting workflow.\n");logfile.flush() chipseq_files = workflow_groups( ex, job, mapped_files, assembly.chromosomes, gl.get('script_path') or '', g_rep, logfile=logfile, via=via ) allfiles = get_files( ex.id, M ) if 'gdv_project' in job.options and 'sql' in allfiles: logfile.write("Adding to GDV project.\n");logfile.flush() allfiles['url'] = {job.options['gdv_project']['public_url']: 'GDV view'} download_url = gl['hts_chipseq']['download'] [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'], job.options['gdv_project']['project_id'], url=download_url+str(k), name = re.sub('\.sql','',str(f)), gdv_url=gl['gdv']['url'] ) for k,f in allfiles['sql'].iteritems()] logfile.close() print json.dumps(allfiles) with open(hts_key+".done",'w') as done: json.dump(allfiles,done) if 'email' in gl: r = email.EmailReport( sender=gl['email']['sender'], to=str(job.email), subject="Chipseq job "+str(job.description), smtp_server=gl['email']['smtp'] ) r.appendBody(''' Your chip-seq job has finished. The description was: '''+str(job.description)+''' and its unique key is '''+hts_key+'''. You can retrieve the results at this url: '''+gl['hts_chipseq']['url']+"jobs/"+hts_key+"/get_results") r.send() return 0
def main(argv=None): parser = None try: parser = optparse.OptionParser(usage=usage, description=description) for opt in opts: parser.add_option(opt[0],opt[1],help=opt[2],**opt[3]) (options, args) = parser.parse_args() if options.minilims in module_list: M = MiniLIMS(os.path.join(options.basepath,options.minilims+"_minilims")) elif os.path.exists(options.minilims): M = MiniLIMS(options.minilims) else: raise Usage("Minilims not found, please specify a path or a module with -m.") tags = options.tag if tags: tags = tags.split(",") if tags[0].count("="): tags = dict(x.split("=") for x in tags) elif tags[0].count(":"): tags = dict(x.split(":") for x in tags) if options.gdv: if tags: tags['type'] = 'sql' else: tags = {'type': 'sql'} if options.programs: if isinstance(options.execution, basestring): exlist = M.search_executions(with_description=options.execution) if len(exlist) == 0: exlist = M.search_executions(with_text=options.execution) if len(exlist) == 0: raise Usage("Execution with key %s not found in %s." %(options.execution,options.minilims)) exid = max(exlist) else: exid = int(options.execution or 0) exec_data = M.fetch_execution(exid)['programs'] outfile = options.output and open(options.output,"w") or sys.stdout for prog in exec_data: pargs = prog['arguments'] if tags and all([t not in x for x in pargs for t in tags]): continue stout = prog['stdout'].encode('ascii','ignore') sterr = prog['stderr'].encode('ascii','ignore') if pargs[0] == 'bsub': pargs = str(pargs[-1]) else: pargs = str(" ".join(pargs)) outfile.write("\n".join([pargs,stout,'',sterr,'','-'*40,''])) outfile.close() return 0 files = get_files(options.execution,M,select_param=tags) fprefix = '' if options.list: if options.output and os.path.isdir(options.output): options.output = os.path.join(options.output,options.execution+".txt") outfile = options.output and open(options.output,"w") or sys.stdout outfile.write("\t".join(["type","group","name","path","comment"])+"\n") else: if not(options.output): options.output = "./" if not(os.path.isdir(options.output)): options.output, fprefix = os.path.split(options.output) if options.gdv: gdvpaths = [] gdvnames = [] for t in sorted(files.keys()): for k,v in files[t].iteritems(): fpath = os.path.join(M.file_path,k) vv = v.split("[") fname = fprefix+vv.pop(0) comment = '' par_dict = {} if vv: vv = vv[0].split("]") par_dict = dict(x.split(":") for x in vv.pop(0).split(",")) if vv: comment = vv[0].strip().strip("()") if not(options.admin) and par_dict.get('view') == 'admin': continue if options.list: outfile.write("\t".join([t,par_dict.get('groupId',''),fname,fpath,comment])+"\n") if options.copy: shutil.copy(fpath, os.path.join(options.output,fname)) if options.symlink: os.symlink(fpath, os.path.join(options.output,fname)) if options.gdv: gdvpaths.append(fpath) gdvnames.append(re.sub('\.sql.*','',str(fname))) if options.list and options.output: outfile.close() if options.gdv: from bbcflib import gdv gdvurl = options.gdvurl or gdv.default_url gdvproject = gdv.get_project(mail=options.email, key=options.key, project_key=options.gdv) if gdvproject.get('project',{}).get('id',0)>0: try: tr = gdv.multiple_tracks( mail=options.email, key=options.key, project_id=gdvproject['project']['id'], urls=gdvpaths, names=gdvnames, extensions=['sql']*len(gdvpaths), force=True, serv_url=gdvurl ) except Exception, err: raise Usage("GDV Tracks Failed: %s\n" %err) print """ *********** GDV project at: *********** %s/public/project?k=%s&id=%s *************************************** """ %(gdvurl,gdvproject['project']['download_key'],gdvproject['project']['id']) return 0
def __call__(self,opts): self.opts = opts if os.path.exists(self.opts.wdir): os.chdir(self.opts.wdir) else: raise Usage("Working directory '%s' does not exist." %self.opts.wdir) ##### Connect to Minilims, recover global variables, fetch job info self.minilims = os.path.join(self.opts.basepath,self.name+"_minilims") M = MiniLIMS(self.minilims) if not((self.opts.key != None or (self.opts.config and os.path.exists(self.opts.config)))): raise Usage("Need a job key or a configuration file") if self.opts.key: self.globals = use_pickle(M, "global variables") htss = frontend.Frontend( url=self.globals['hts_mapseq']['url'] ) self.job = htss.job( self.opts.key ) [M.delete_execution(x) for x in \ M.search_executions(with_description=self.opts.key,fails=True)] if self.job.options.get("config_file"): if os.path.exists(self.job.options["config_file"]): self.opts.config = os.path.abspath(self.job.options["config_file"]) elif os.path.exists("config.txt"): self.opts.config = os.path.abspath("config.txt") if self.opts.config and os.path.exists(self.opts.config): (self.job,self.globals) = frontend.parseConfig( self.opts.config, self.job, self.globals ) elif os.path.exists(self.opts.config): (self.job,self.globals) = frontend.parseConfig( self.opts.config ) self.opts.key = self.job.description else: raise Usage("Need either a job key (-k) or a configuration file (-c).") ##### Genrep instance if 'fasta_file' in self.job.options: if os.path.exists(self.job.options['fasta_file']): self.job.options['fasta_file'] = os.path.abspath(self.job.options['fasta_path']) else: for ext in (".fa",".fa.gz",".tar.gz"): if os.path.exists("ref_sequence"+ext): self.job.options['fasta_file'] = os.path.abspath("ref_sequence"+ext) if not os.path.exists(self.job.options['fasta_file']): raise Usage("Don't know where to find fasta file %s." %self.job.options["fasta_file"]) g_rep = genrep.GenRep( url=self.globals.get("genrep_url"), root=self.globals.get("bwt_root") ) ##### Configure facility LIMS if 'lims' in self.globals: from bbcflib import daflims self.job.dafl = dict((loc,daflims.DAFLIMS( username=self.globals['lims']['user'], password=pwd )) for loc,pwd in self.globals['lims']['passwd'].iteritems()) ######################################################################## ########################## EXECUTION ################################# ######################################################################## ##### Logging logfile_name = os.path.abspath(self.opts.key+".log") debugfile_name = os.path.abspath(self.opts.key+".debug") self.logfile = open(logfile_name,'w') self.debugfile = open(debugfile_name,'w') self.debug_write(json.dumps(self.globals)+"\n") with execution( M, description=self.opts.key, remote_working_directory=self.opts.wdir ) as ex: self.log_write("Enter execution. Current working directory: %s" %ex.working_directory) self.job.assembly = genrep.Assembly( assembly=self.job.assembly_id, genrep=g_rep, fasta=self.job.options.get('fasta_file'), annot=self.job.options.get('annot_file'), intype=self.job.options.get('input_type_id',0), ex=ex, via=self.opts.via, bowtie2=self.job.options.get("bowtie2",True) ) ##### Check all the options if not self.check_options(): raise Usage("Problem with options %s" %self.opts) self.debug_write(json.dumps(self.job.options)) self.init_files( ex ) ##### Run workflow self.log_write("Starting workflow.") self.main_func(ex,**self.main_args) ##### Add logs to the LIMS in admin mode self.logfile.flush() self.debugfile.flush() log_desc = set_file_descr('logfile.txt', step='log', type='txt', view="admin") debug_desc = set_file_descr('debug.txt', step='log', type='txt', view="admin") ex.add(os.path.join(logfile_name), description=log_desc) ex.add(os.path.join(debugfile_name), description=debug_desc) ##### Create GDV project if self.job.options['create_gdv_project']: self.gdv_create(ex) ######################################################################## ######################## POSTPROCESSING ############################## ######################################################################## allfiles = get_files( ex.id, M ) if self.job.options['create_gdv_project'] and \ self.job.options['gdv_project'].get('project',{}).get('id',0)>0: allfiles['url'] = self.gdv_upload(allfiles.get('sql',{})) self.logfile.close() self.debugfile.close() print json.dumps(allfiles) with open(self.opts.key+".done",'w') as done: json.dump(allfiles,done) self.send_email() return 0
def main(): map_args = None # {'bwt_args':["-n",str(3),"-p",str(4),"-d",str(50),"--chunkmbs",str(1024),"-m",str(5)]} opts = (("-v", "--via", "Run executions using method 'via' (can be 'local' or 'lsf')", {'default': "lsf"}), ("-k", "--key", "Alphanumeric key of the new RNA-seq job", {'default': None}), ("-d", "--minilims", "MiniLIMS where RNAseq executions and files will be stored.", {'default': None}), ("-m", "--mapseq-minilims", "MiniLIMS where a previous Mapseq execution and files has been stored. \ Set it to None to align de novo from read files.", {'default': "/data/htsstation/mapseq/mapseq_minilims", 'dest':"ms_limspath"}), ("-w", "--working-directory", "Create execution working directories in wdir", {'default': os.getcwd(), 'dest':"wdir"}), ("-c", "--config", "Config file", {'default': None}), ("-p", "--pileup_level", "Target features, inside of quotes, separated by commas.\ E.g. 'genes,exons,transcripts'",{'default': "genes,exons,transcripts"})) try: usage = "run_rnaseq.py [OPTIONS]" desc = """A High-throughput RNA-seq analysis workflow. It returns a file containing a column of transcript counts for each given BAM file, normalized using DESeq's size factors. """ parser = optparse.OptionParser(usage=usage, description=desc) for opt in opts: parser.add_option(opt[0],opt[1],help=opt[2],**opt[3]) (opt, args) = parser.parse_args() if os.path.exists(opt.wdir): os.chdir(opt.wdir) else: parser.error("Working directory '%s' does not exist." % opt.wdir) if not opt.minilims: parser.error("Must specify a MiniLIMS to attach to") # Rna-seq job configuration M = MiniLIMS(opt.minilims) if opt.key: gl = use_pickle( M, "global variables" ) htss = frontend.Frontend( url=gl['hts_rnaseq']['url'] ) job = htss.job(opt.key) # new *RNA-seq* job instance #h_pileup_level = {'0':'genes', '1':'exons', '2':'transcripts'} #pileup_level = [h_pileup_level[e] for e in job.options.get('pileup_level').split(',')] [M.delete_execution(x) for x in M.search_executions(with_description=opt.key,fails=True)] description = "Job run with mapseq key %s" % opt.key elif os.path.exists(opt.config): pileup_level = opt.pileup_level.split(',') (job,gl) = frontend.parseConfig(opt.config) description = "Job run with config file %s" % opt.config else: raise ValueError("Need either a job key (-k) or a configuration file (-c).") job.options['ucsc_bigwig'] = job.options.get('ucsc_bigwig') or True job.options['gdv_project'] = job.options.get('gdv_project') or False job.options['discard_pcr_duplicates'] = job.options.get('discard_pcr_duplicates') or False assembly_id = job.assembly_id g_rep = genrep.GenRep( gl['genrep_url'], gl.get('bwt_root'), intype=1 ) #intype is for mapping on the genome (intype=0), exons (intype=1) or transcriptome (intype=2) assembly = g_rep.assembly(assembly_id) # Retrieve mapseq output mapseq_url = None if 'hts_mapseq' in gl: mapseq_url = gl['hts_mapseq']['url'] # Program body # with execution(M, description=description, remote_working_directory=opt.wdir ) as ex: if opt.ms_limspath == "None": print "Alignment..." job = mapseq.get_fastq_files( job, ex.working_directory) fastq_root = os.path.abspath(ex.working_directory) bam_files = mapseq.map_groups(ex, job, fastq_root, assembly_or_dict=assembly, map_args=map_args) print "Reads aligned." else: print "Loading BAM files..." (bam_files, job) = mapseq.get_bam_wig_files(ex, job, minilims=opt.ms_limspath, hts_url=mapseq_url, script_path=gl.get('script_path') or '', via=opt.via ) print "Loaded." assert bam_files, "Bam files not found." print "Current working directory:", ex.working_directory rnaseq.rnaseq_workflow(ex, job, assembly, bam_files, pileup_level=pileup_level, via=opt.via) # End of program body # # GDV allfiles = common.get_files(ex.id, M) if 'gdv_project' in job.options and 'sql' in allfiles: allfiles['url'] = {job.options['gdv_project']['public_url']: 'GDV view'} download_url = gl['hts_rnapseq']['download'] [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'], job.options['gdv_project']['project_id'], url=download_url+str(k), name = re.sub('\.sql','',str(f)), gdv_url=gl['gdv']['url'] ) for k,f in allfiles['sql'].iteritems()] print json.dumps(allfiles) # E-mail if 'email' in gl: r = email.EmailReport( sender=gl['email']['sender'], to=str(job.email), subject="RNA-seq job "+str(job.description), smtp_server=gl['email']['smtp'] ) r.appendBody('''Your RNA-seq job is finished. \n The description was: '''+str(job.description)+''' and its unique key is '''+opt.key+'''. \n You can retrieve the results at this url: '''+gl['hts_rnaseq']['url']+"jobs/"+opt.key+"/get_results" ) r.send() sys.exit(0) except Usage, err: print >>sys.stderr, err.msg print >>sys.stderr, usage return 2
def main(argv = None): via = "lsf" limspath = None hts_key = '' working_dir = None config_file = None if argv is None: argv = sys.argv try: try: opts,args = getopt.getopt(sys.argv[1:],"hu:k:d:w:c:", ["help","via=","key=","minilims=", "working-directory=","config="]) except getopt.error, msg: raise Usage(msg) for o, a in opts: if o in ("-h", "--help"): print __doc__ print usage return 0 elif o in ("-u", "--via"): if a=="local": via = "local" elif a=="lsf": via = "lsf" else: raise Usage("Via (-u) can only be \"local\" or \"lsf\", got %s." % (a,)) elif o in ("-w", "--working-directory"): if os.path.exists(a): os.chdir(a) working_dir = a else: raise Usage("Working directory '%s' does not exist." % a) elif o in ("-d", "--minilims"): limspath = a elif o in ("-k", "--key"): hts_key = a elif o in ("-c", "--config"): config_file = a else: raise Usage("Unhandled option: " + o) if not(limspath and os.path.exists(limspath) and (hts_key != None or (config_file and os.path.exists(config_file)))): raise Usage("Need a minilims and a job key or a configuration file") M = MiniLIMS( limspath ) if len(hts_key)>1: gl = use_pickle(M, "global variables") htss = frontend.Frontend( url=gl['hts_mapseq']['url'] ) job = htss.job( hts_key ) [M.delete_execution(x) for x in M.search_executions(with_description=hts_key,fails=True)] elif os.path.exists(config_file): (job,gl) = frontend.parseConfig( config_file ) hts_key = job.description else: raise ValueError("Need either a job key (-k) or a configuration file (-c).") g_rep = genrep.GenRep( url=gl["genrep_url"], root=gl["bwt_root"], intype=job.options.get('input_type_id') or 0 ) assembly = g_rep.assembly( job.assembly_id ) if 'lims' in gl: dafl = dict((loc,daflims.DAFLIMS( username=gl['lims']['user'], password=pwd )) for loc,pwd in gl['lims']['passwd'].iteritems()) else: dafl = None if not('compute_densities' in job.options): job.options['compute_densities'] = True elif isinstance(job.options['compute_densities'],str): job.options['compute_densities'] = job.options['compute_densities'].lower() in ['1','true','t'] if not('ucsc_bigwig' in job.options): job.options['ucsc_bigwig'] = True elif isinstance(job.options['ucsc_bigwig'],str): job.options['ucsc_bigwig'] = job.options['ucsc_bigwig'].lower() in ['1','true','t'] job.options['ucsc_bigwig'] = job.options['ucsc_bigwig'] and job.options['compute_densities'] if not('create_gdv_project' in job.options): job.options['create_gdv_project'] = False elif isinstance(job.options['create_gdv_project'],str): job.options['create_gdv_project'] = job.options['create_gdv_project'].lower() in ['1','true','t'] if job.options.get('read_extension'): job.options['read_extension'] = int(job.options['read_extension']) if job.options.get('merge_strands'): job.options['merge_strands'] = int(job.options['merge_strands']) logfile = open(hts_key+".log",'w') with execution( M, description=hts_key, remote_working_directory=working_dir ) as ex: logfile.write("Enter execution, fetch fastq files.\n");logfile.flush() job = get_fastq_files( job, ex.working_directory, dafl ) logfile.write("Map reads.\n");logfile.flush() mapped_files = map_groups( ex, job, ex.working_directory, assembly, {'via': via} ) logfile.write("Make stats:\n");logfile.flush() for k,v in job.groups.iteritems(): logfile.write(str(k)+str(v['name'])+"\t");logfile.flush() pdf = add_pdf_stats( ex, mapped_files, {k:v['name']}, gl.get('script_path') or '', description=set_file_descr(v['name']+"_mapping_report.pdf",groupId=k,step='stats',type='pdf') ) if job.options['compute_densities']: logfile.write("computing densities.\n");logfile.flush() if not(job.options.get('read_extension')>0): job.options['read_extension'] = mapped_files.values()[0].values()[0]['stats']['read_length'] density_files = densities_groups( ex, job, mapped_files, assembly.chromosomes, via=via ) logfile.write("Finished computing densities.\n");logfile.flush() if job.options['create_gdv_project']: logfile.write("Creating GDV project.\n");logfile.flush() gdv_project = gdv.create_gdv_project( gl['gdv']['key'], gl['gdv']['email'], job.description, assembly.nr_assembly_id, gdv_url=gl['gdv']['url'], public=True ) logfile.write("GDV project: "+str(gdv_project['project_id']+"\n"));logfile.flush() add_pickle( ex, gdv_project, description=set_file_descr("gdv_json",step='gdv',type='py',view='admin') ) allfiles = get_files( ex.id, M ) if 'ucsc_bigwig' and g_rep.intype == 0: ucscfiles = get_files( ex.id, M, select_param={'ucsc':'1'} ) with open(hts_key+".bed",'w') as ucscbed: for ftype,fset in ucscfiles.iteritems(): for ffile,descr in fset.iteritems(): if re.search(r' \(.*\)',descr): continue ucscbed.write(track_header(descr,ftype,gl['hts_mapseq']['download'],ffile)) if job.options['create_gdv_project']: allfiles['url'] = {gdv_project['public_url']: 'GDV view'} download_url = gl['hts_mapseq']['download'] [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'], gdv_project['project_id'], url=download_url+str(k), name = re.sub('\.sql','',str(f)), gdv_url=gl['gdv']['url'] ) for k,f in allfiles['sql'].iteritems()] logfile.close() print json.dumps(allfiles) with open(hts_key+".done",'w') as done: json.dump(allfiles,done) if 'email' in gl: r = email.EmailReport( sender=gl['email']['sender'], to=str(job.email), subject="Mapseq job "+str(job.description), smtp_server=gl['email']['smtp'] ) r.appendBody(''' Your mapseq job has finished. The description was: '''+str(job.description)+''' and its unique key is '''+hts_key+'''. You can now retrieve the results at this url: '''+gl['hts_mapseq']['url']+"jobs/"+hts_key+"/get_results") r.send() return 0
def main(argv = None): via = "lsf" limspath = None ms_limspath = "/data/htsstation/demultiplex/demultiplex_minilims" hts_key = None working_dir = None config = None if argv is None: argv = sys.argv try: try: opts,args = getopt.getopt(sys.argv[1:],"hu:k:d:w:m:c:", ["help","via","key","minilims", "working-directory","config"]) except getopt.error, msg: raise Usage(msg) for o, a in opts: if o in ("-h", "--help"): print __doc__ print usage sys.exit(0) elif o in ("-u", "--via"): if a=="local": via = "local" elif a=="lsf": via = "lsf" else: raise Usage("Via (-u) can only be \"local\" or \"lsf\", got %s." % (a,)) elif o in ("-w", "--working-directory"): if os.path.exists(a): os.chdir(a) working_dir = a else: raise Usage("Working directory '%s' does not exist." % a) elif o in ("-d", "--minilims"): limspath = a elif o in ("-k", "--key"): hts_key = a elif o in ("-c", "--config"): config_file = a else: raise Usage("Unhandled option: " + o) #temporary: test_demultiplexing.py -i /scratch/cluster/monthly/mleleu/Nicolas/NL1_NoIndex_L004_R1.fastq -p /scratch/cluster/monthly/mleleu/Nicolas/primers4C_GT_NL.fa -x 22 -n 2 -s 77 -l 30 & #job={'description':'test_demultiplex_Nico', 'options':{'opt1':'','opt2':''} , 'group': {'grpName':'grpNameNico', 'run':{'runName':'runNameNico','fastaFile':'/scratch/cluster/monthly/mleleu/Nicolas/NL1_NoIndex_L004_R1_part.fastq'}}} M = MiniLIMS( limspath ) if len(hts_key)>1: gl = use_pickle( M, "global variables" ) htss = frontend.Frontend( url=gl['hts_demultiplex']['url'] ) job = htss.job( hts_key ) elif os.path.exists(config_file): (job,gl) = frontend.parseConfig( config_file ) else: raise ValueError("Need either a job key (-k) or a configuration file (-c).") job.options['ucsc_bigwig'] = True with execution( M, description=hts_key, remote_working_directory=working_dir ) as ex: demultiplex_files = demultiplex.workflow_groups( ex, job, gl['script_path']) allfiles = common.get_files( ex.id, M ) # gdv_project = gdv.create_gdv_project( gl['gdv']['key'], gl['gdv']['email'], # job.description, # assembly.nr_assembly_id, # gdv_url=gl['gdv']['url'], public=True ) # if 'sql' in allfiles: # allfiles['url'] = {gdv_projec['public_url']: 'GDV view'} # download_url = gl['hts_demultiplex']['download'] # [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'], # gdv_project['project_id'], # url=download_url+str(k), # name = re.sub('\.sql','',str(f)), # gdv_url=gl['gdv']['url'] ) # for k,f in allfiles['sql'].iteritems()] print json.dumps(allfiles)