Beispiel #1
0
def main(argv = None):
    via = "lsf"
    limspath = None
    ms_limspath = "/data/htsstation/mapseq/mapseq_minilims"
    hts_key = None
    working_dir = None
    config = None
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts,args = getopt.getopt(sys.argv[1:],"hu:k:d:w:m:c:",
                                      ["help","via","key","minilims",
                                       "mapseq_minilims",
                                       "working-directory","config"])
        except getopt.error, msg:
            raise Usage(msg)
        for o, a in opts:
            if o in ("-h", "--help"):
                print __doc__
                print usage
                sys.exit(0)
            elif o in ("-u", "--via"):
                if a=="local":
                    via = "local"
                elif a=="lsf":
                    via = "lsf"
                else:
                    raise Usage("Via (-u) can only be \"local\" or \"lsf\", got %s." % (a,))
            elif o in ("-w", "--working-directory"):
                if os.path.exists(a):
                    os.chdir(a)
                    working_dir = a
                else:
                    raise Usage("Working directory '%s' does not exist." % a)
            elif o in ("-d", "--minilims"):
                limspath = a
            elif o in ("-m", "--mapseq_minilims"):
                ms_limspath = a
            elif o in ("-k", "--key"):
                hts_key = a
            elif o in ("-c", "--config"):
                config_file = a
            else:
                raise Usage("Unhandled option: " + o)
        M = MiniLIMS( limspath )
        if len(hts_key)>1:
            gl = use_pickle( M, "global variables" )
            htss = frontend.Frontend( url=gl['hts_4cseq']['url'] )
            job = htss.job( hts_key )
        elif os.path.exists(config_file):
            (job,gl) = frontend.parseConfig( config_file )
        else:
            raise ValueError("Need either a job key (-k) or a configuration file (-c).")
        mapseq_url = None
        if 'hts_mapseq' in gl:
                mapseq_url = gl['hts_mapseq']['url']
        job.options['ucsc_bigwig'] = True
        g_rep = genrep.GenRep( gl["genrep_url"], gl.get("bwt_root") )
        assembly = g_rep.assembly( job.assembly_id )
        primers_file='/scratch/cluster/monthly/htsstation/4cseq/'+str(job.id)+'/primers.fa'
        primers_dict=c4seq.loadPrimers(primers_file)
        with execution( M, description=hts_key, remote_working_directory=working_dir ) as ex:
            (mapseq_files, job) = mapseq.get_bam_wig_files( ex, job, ms_limspath, mapseq_url, suffix=['merged'],script_path=gl['script_path'], via=via )
            c4seq_files = c4seq.workflow_groups( ex, job, primers_dict, g_rep,
                                           mapseq_files, mapseq_url,
                                           gl['script_path'])

        ucscfiles = common.get_files( ex.id, M, select_param={'ucsc':'1'} )
        with open(hts_key+".bed",'w') as ucscbed:
            for ftype,fset in ucscfiles.iteritems():
                for ffile,descr in fset.iteritems():
                    ucscbed.write(common.track_header(descr,ftype,gl['hts_4cseq']['download'],ffile))

        allfiles = common.get_files( ex.id, M )
	
#        gdv_project = gdv.create_gdv_project( gl['gdv']['key'], gl['gdv']['email'],
#                                                job.description,
#                                                assembly.nr_assembly_id,
#                                                gdv_url=gl['gdv']['url'], public=True )
#        add_pickle( ex, gdv_project, description='py:gdv_json' )
#        if 'sql' in allfiles:
#                allfiles['url'] = {gdv_project['public_url']: 'GDV view'}
#                download_url = gl['hts_4cseq']['download']
#                [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'],
#                                gdv_project['project_id'],
#                                url=download_url+str(k),
#                                name = re.sub('\.sql','',str(f)),
#                                gdv_url=gl['gdv']['url'])
#                 for k,f in allfiles['sql'].iteritems()]

        print json.dumps(allfiles)
        with open(hts_key+".done",'w') as done:
                json.dump(allfiles,done)


        if 'email' in gl:
            r = email.EmailReport( sender=gl['email']['sender'],
                                   to=str(job.email),
                                   subject="4cseq job "+str(job.description),
                                   smtp_server=gl['email']['smtp'] )
            r.appendBody('''
Your 4C-seq job is finished.

The description was:
'''+str(job.description)+'''
and its unique key is '''+hts_key+'''.

You can retrieve the results at this url:
'''+gl['hts_4cseq']['url']+"jobs/"+hts_key+"/get_results")
            r.send()
        sys.exit(0)
Beispiel #2
0
def main(argv = None):
    via = "lsf"
    limspath = None
    ms_limspath = "/data/htsstation/mapseq/mapseq_minilims"
    hts_key = ''
    working_dir = None
    config_file = None
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts,args = getopt.getopt(sys.argv[1:],"hu:k:d:w:m:c:",
                                      ["help","via=","key=","minilims=",
                                       "mapseq_minilims=",
                                       "working-directory=","config="])
        except getopt.error, msg:
            raise Usage(msg)
        for o, a in opts:
            if o in ("-h", "--help"):
                print __doc__
                print usage
                return 0
            elif o in ("-u", "--via"):
                if a=="local":
                    via = "local"
                elif a=="lsf":
                    via = "lsf"
                else:
                    raise Usage("Via (-u) can only be \"local\" or \"lsf\", got %s." % (a,))
            elif o in ("-w", "--working-directory"):
                if os.path.exists(a):
                    os.chdir(a)
                    working_dir = a
                else:
                    raise Usage("Working directory '%s' does not exist." % a)
            elif o in ("-d", "--minilims"):
                limspath = a
            elif o in ("-m", "--mapseq_minilims"):
                ms_limspath = a
            elif o in ("-k", "--key"):
                hts_key = a
            elif o in ("-c", "--config"):
                config_file = a
            else:
                raise Usage("Unhandled option: " + o)
        if not(limspath and os.path.exists(limspath)
               and (hts_key != None or (config_file and os.path.exists(config_file)))):
            raise Usage("Need a minilims and a job key or a configuration file")
        M = MiniLIMS( limspath )
        if len(hts_key)>1:
            gl = use_pickle( M, "global variables" )
            htss = frontend.Frontend( url=gl['hts_chipseq']['url'] )
            job = htss.job( hts_key )
            [M.delete_execution(x) for x in M.search_executions(with_description=hts_key,fails=True)]
        elif os.path.exists(config_file):
            (job,gl) = frontend.parseConfig( config_file )
            hts_key = job.description
        else:
            raise ValueError("Need either a job key (-k) or a configuration file (-c).")
        mapseq_url = None
        if 'hts_mapseq' in gl:
            mapseq_url = gl['hts_mapseq']['url']
        job.options['ucsc_bigwig'] = True
        g_rep = genrep.GenRep( gl["genrep_url"], gl.get("bwt_root") )
        assembly = g_rep.assembly( job.assembly_id )
        logfile = open(hts_key+".log",'w')
        with execution( M, description=hts_key, remote_working_directory=working_dir ) as ex:
            logfile.write("Enter execution, fetch bam and wig files.\n");logfile.flush()
            (mapped_files, job) = mapseq.get_bam_wig_files( ex, job, minilims=ms_limspath, hts_url=mapseq_url,
                                                     script_path=gl.get('script_path') or '', via=via )
            logfile.write("Starting workflow.\n");logfile.flush()
            chipseq_files = workflow_groups( ex, job, mapped_files, assembly.chromosomes,
                                             gl.get('script_path') or '', g_rep, logfile=logfile, via=via )

        allfiles = get_files( ex.id, M )
        if 'gdv_project' in job.options and 'sql' in allfiles:
            logfile.write("Adding to GDV project.\n");logfile.flush()
            allfiles['url'] = {job.options['gdv_project']['public_url']: 'GDV view'}
            download_url = gl['hts_chipseq']['download']
            [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'],
                                job.options['gdv_project']['project_id'],
                                url=download_url+str(k),
                                name = re.sub('\.sql','',str(f)),
                                gdv_url=gl['gdv']['url'] )
             for k,f in allfiles['sql'].iteritems()]
        logfile.close()
        print json.dumps(allfiles)
        with open(hts_key+".done",'w') as done:
            json.dump(allfiles,done)
        if 'email' in gl:
            r = email.EmailReport( sender=gl['email']['sender'],
                                   to=str(job.email),
                                   subject="Chipseq job "+str(job.description),
                                   smtp_server=gl['email']['smtp'] )
            r.appendBody('''
Your chip-seq job has finished.

The description was:
'''+str(job.description)+'''
and its unique key is '''+hts_key+'''.

You can retrieve the results at this url:
'''+gl['hts_chipseq']['url']+"jobs/"+hts_key+"/get_results")
            r.send()
        return 0
Beispiel #3
0
def main(argv=None):
    parser = None
    try:
        parser = optparse.OptionParser(usage=usage, description=description)
        for opt in opts:
            parser.add_option(opt[0],opt[1],help=opt[2],**opt[3])
        (options, args) = parser.parse_args()
        if options.minilims in module_list:
            M = MiniLIMS(os.path.join(options.basepath,options.minilims+"_minilims"))
        elif os.path.exists(options.minilims):
            M = MiniLIMS(options.minilims)
        else:
            raise Usage("Minilims not found, please specify a path or a module with -m.")
        tags = options.tag
        if tags:
            tags = tags.split(",")
            if tags[0].count("="):
                tags = dict(x.split("=") for x in tags)
            elif tags[0].count(":"):
                tags = dict(x.split(":") for x in tags)
        if options.gdv:
            if tags: tags['type'] = 'sql'
            else: tags = {'type': 'sql'}
        if options.programs:
            if isinstance(options.execution, basestring):
                exlist = M.search_executions(with_description=options.execution)
                if len(exlist) == 0:
                    exlist = M.search_executions(with_text=options.execution)
                if len(exlist) == 0:
                    raise Usage("Execution with key %s not found in %s." %(options.execution,options.minilims))
                exid = max(exlist)
            else:
                exid = int(options.execution or 0)
            exec_data = M.fetch_execution(exid)['programs']
            outfile = options.output and open(options.output,"w") or sys.stdout
            for prog in exec_data:
                pargs = prog['arguments']
                if tags and all([t not in x for x in pargs for t in tags]):
                    continue
                stout = prog['stdout'].encode('ascii','ignore')
                sterr = prog['stderr'].encode('ascii','ignore')
                if pargs[0] == 'bsub': pargs = str(pargs[-1])
                else: pargs = str(" ".join(pargs))
                outfile.write("\n".join([pargs,stout,'',sterr,'','-'*40,'']))
            outfile.close()
            return 0
        files = get_files(options.execution,M,select_param=tags)
        fprefix = ''
        if options.list:
            if options.output and os.path.isdir(options.output):
                options.output = os.path.join(options.output,options.execution+".txt")
            outfile = options.output and open(options.output,"w") or sys.stdout
            outfile.write("\t".join(["type","group","name","path","comment"])+"\n")
        else:
            if not(options.output): options.output = "./"
            if not(os.path.isdir(options.output)):
                options.output, fprefix = os.path.split(options.output)
        if options.gdv:
            gdvpaths = []
            gdvnames = []
        for t in sorted(files.keys()):
            for k,v in files[t].iteritems():
                fpath = os.path.join(M.file_path,k)
                vv = v.split("[")
                fname = fprefix+vv.pop(0)
                comment = ''
                par_dict = {}
                if vv:
                    vv = vv[0].split("]")
                    par_dict = dict(x.split(":") for x in vv.pop(0).split(","))
                    if vv: comment = vv[0].strip().strip("()")
                if not(options.admin) and par_dict.get('view') == 'admin': continue
                if options.list: outfile.write("\t".join([t,par_dict.get('groupId',''),fname,fpath,comment])+"\n")
                if options.copy: shutil.copy(fpath, os.path.join(options.output,fname))
                if options.symlink: os.symlink(fpath, os.path.join(options.output,fname))
                if options.gdv:
                    gdvpaths.append(fpath)
                    gdvnames.append(re.sub('\.sql.*','',str(fname)))
        if options.list and options.output: outfile.close()
        if options.gdv:
            from bbcflib import gdv
            gdvurl = options.gdvurl or gdv.default_url
            gdvproject = gdv.get_project(mail=options.email, key=options.key,
                                         project_key=options.gdv)
            if gdvproject.get('project',{}).get('id',0)>0:
                try:
                    tr = gdv.multiple_tracks( mail=options.email, key=options.key,
                                              project_id=gdvproject['project']['id'],
                                              urls=gdvpaths, names=gdvnames, extensions=['sql']*len(gdvpaths),
                                              force=True, serv_url=gdvurl )
                except Exception, err:
                    raise Usage("GDV Tracks Failed: %s\n" %err)
                print """
*********** GDV project at: ***********
%s/public/project?k=%s&id=%s
***************************************
""" %(gdvurl,gdvproject['project']['download_key'],gdvproject['project']['id'])
        return 0
Beispiel #4
0
    def __call__(self,opts):
        self.opts = opts
        if os.path.exists(self.opts.wdir):
            os.chdir(self.opts.wdir)
        else:
            raise Usage("Working directory '%s' does not exist." %self.opts.wdir)

##### Connect to Minilims, recover global variables, fetch job info
        self.minilims = os.path.join(self.opts.basepath,self.name+"_minilims")
        M = MiniLIMS(self.minilims)
        if not((self.opts.key != None or (self.opts.config and os.path.exists(self.opts.config)))):
            raise Usage("Need a job key or a configuration file")
        if self.opts.key:
            self.globals = use_pickle(M, "global variables")
            htss = frontend.Frontend( url=self.globals['hts_mapseq']['url'] )
            self.job = htss.job( self.opts.key )
            [M.delete_execution(x) for x in \
                 M.search_executions(with_description=self.opts.key,fails=True)]
            if self.job.options.get("config_file"):
                if os.path.exists(self.job.options["config_file"]):
                    self.opts.config = os.path.abspath(self.job.options["config_file"])
                elif os.path.exists("config.txt"):
                    self.opts.config = os.path.abspath("config.txt")
            if self.opts.config and os.path.exists(self.opts.config):
                (self.job,self.globals) = frontend.parseConfig( self.opts.config, self.job, self.globals )
        elif os.path.exists(self.opts.config):
            (self.job,self.globals) = frontend.parseConfig( self.opts.config )
            self.opts.key = self.job.description
        else:
            raise Usage("Need either a job key (-k) or a configuration file (-c).")
##### Genrep instance
        if 'fasta_file' in self.job.options:
            if os.path.exists(self.job.options['fasta_file']):
                self.job.options['fasta_file'] = os.path.abspath(self.job.options['fasta_path'])
            else:
                for ext in (".fa",".fa.gz",".tar.gz"):
                    if os.path.exists("ref_sequence"+ext):
                        self.job.options['fasta_file'] = os.path.abspath("ref_sequence"+ext)
            if not os.path.exists(self.job.options['fasta_file']):
                raise Usage("Don't know where to find fasta file %s." %self.job.options["fasta_file"])
        g_rep = genrep.GenRep( url=self.globals.get("genrep_url"),
                               root=self.globals.get("bwt_root") )
##### Configure facility LIMS
        if 'lims' in self.globals:
            from bbcflib import daflims
            self.job.dafl = dict((loc,daflims.DAFLIMS( username=self.globals['lims']['user'],
                                                       password=pwd ))
                                 for loc,pwd in self.globals['lims']['passwd'].iteritems())
########################################################################
##########################  EXECUTION  #################################
########################################################################
##### Logging
        logfile_name = os.path.abspath(self.opts.key+".log")
        debugfile_name = os.path.abspath(self.opts.key+".debug")
        self.logfile = open(logfile_name,'w')
        self.debugfile = open(debugfile_name,'w')
        self.debug_write(json.dumps(self.globals)+"\n")
        with execution( M, description=self.opts.key,
                        remote_working_directory=self.opts.wdir ) as ex:
            self.log_write("Enter execution. Current working directory: %s" %ex.working_directory)
            self.job.assembly = genrep.Assembly( assembly=self.job.assembly_id,
                                                 genrep=g_rep,
                                                 fasta=self.job.options.get('fasta_file'),
                                                 annot=self.job.options.get('annot_file'),
                                                 intype=self.job.options.get('input_type_id',0),
                                                 ex=ex, via=self.opts.via,
                                                 bowtie2=self.job.options.get("bowtie2",True) )
##### Check all the options
            if not self.check_options():
                raise Usage("Problem with options %s" %self.opts)
            self.debug_write(json.dumps(self.job.options))
            self.init_files( ex )
##### Run workflow
            self.log_write("Starting workflow.")
            self.main_func(ex,**self.main_args)
##### Add logs to the LIMS in admin mode
            self.logfile.flush()
            self.debugfile.flush()
            log_desc = set_file_descr('logfile.txt', step='log', type='txt', view="admin")
            debug_desc = set_file_descr('debug.txt', step='log', type='txt', view="admin")
            ex.add(os.path.join(logfile_name), description=log_desc)
            ex.add(os.path.join(debugfile_name), description=debug_desc)
##### Create GDV project
            if self.job.options['create_gdv_project']: self.gdv_create(ex)

########################################################################
########################  POSTPROCESSING  ##############################
########################################################################
        allfiles = get_files( ex.id, M )
        if self.job.options['create_gdv_project'] and \
                self.job.options['gdv_project'].get('project',{}).get('id',0)>0:
            allfiles['url'] = self.gdv_upload(allfiles.get('sql',{}))
        self.logfile.close()
        self.debugfile.close()
        print json.dumps(allfiles)
        with open(self.opts.key+".done",'w') as done: json.dump(allfiles,done)
        self.send_email()
        return 0
Beispiel #5
0
def main():
    map_args = None # {'bwt_args':["-n",str(3),"-p",str(4),"-d",str(50),"--chunkmbs",str(1024),"-m",str(5)]}

    opts = (("-v", "--via", "Run executions using method 'via' (can be 'local' or 'lsf')", {'default': "lsf"}),
            ("-k", "--key", "Alphanumeric key of the new RNA-seq job", {'default': None}),
            ("-d", "--minilims", "MiniLIMS where RNAseq executions and files will be stored.", {'default': None}),
            ("-m", "--mapseq-minilims", "MiniLIMS where a previous Mapseq execution and files has been stored. \
                                     Set it to None to align de novo from read files.",
                                     {'default': "/data/htsstation/mapseq/mapseq_minilims", 'dest':"ms_limspath"}),
            ("-w", "--working-directory", "Create execution working directories in wdir",
                                     {'default': os.getcwd(), 'dest':"wdir"}),
            ("-c", "--config", "Config file", {'default': None}),
            ("-p", "--pileup_level", "Target features, inside of quotes, separated by commas.\
                                     E.g. 'genes,exons,transcripts'",{'default': "genes,exons,transcripts"}))
    try:
        usage = "run_rnaseq.py [OPTIONS]"
        desc = """A High-throughput RNA-seq analysis workflow. It returns a file containing
                  a column of transcript counts for each given BAM file, normalized using DESeq's
                  size factors. """
        parser = optparse.OptionParser(usage=usage, description=desc)
        for opt in opts:
            parser.add_option(opt[0],opt[1],help=opt[2],**opt[3])
        (opt, args) = parser.parse_args()

        if os.path.exists(opt.wdir): os.chdir(opt.wdir)
        else: parser.error("Working directory '%s' does not exist." % opt.wdir)
        if not opt.minilims: parser.error("Must specify a MiniLIMS to attach to")

        # Rna-seq job configuration
        M = MiniLIMS(opt.minilims)
        if opt.key:
            gl = use_pickle( M, "global variables" )
            htss = frontend.Frontend( url=gl['hts_rnaseq']['url'] )
            job = htss.job(opt.key) # new *RNA-seq* job instance
            #h_pileup_level = {'0':'genes', '1':'exons', '2':'transcripts'}
            #pileup_level = [h_pileup_level[e] for e in job.options.get('pileup_level').split(',')]
            [M.delete_execution(x) for x in M.search_executions(with_description=opt.key,fails=True)]
            description = "Job run with mapseq key %s" % opt.key
        elif os.path.exists(opt.config):
            pileup_level = opt.pileup_level.split(',')
            (job,gl) = frontend.parseConfig(opt.config)
            description = "Job run with config file %s" % opt.config
        else: raise ValueError("Need either a job key (-k) or a configuration file (-c).")

        job.options['ucsc_bigwig'] = job.options.get('ucsc_bigwig') or True
        job.options['gdv_project'] = job.options.get('gdv_project') or False
        job.options['discard_pcr_duplicates'] = job.options.get('discard_pcr_duplicates') or False
        assembly_id = job.assembly_id
        g_rep = genrep.GenRep( gl['genrep_url'], gl.get('bwt_root'), intype=1 )
            #intype is for mapping on the genome (intype=0), exons (intype=1) or transcriptome (intype=2)
        assembly = g_rep.assembly(assembly_id)

        # Retrieve mapseq output
        mapseq_url = None
        if 'hts_mapseq' in gl: mapseq_url = gl['hts_mapseq']['url']

        # Program body #
        with execution(M, description=description, remote_working_directory=opt.wdir ) as ex:
            if opt.ms_limspath == "None":
                print "Alignment..."
                job = mapseq.get_fastq_files( job, ex.working_directory)
                fastq_root = os.path.abspath(ex.working_directory)
                bam_files = mapseq.map_groups(ex, job, fastq_root, assembly_or_dict=assembly, map_args=map_args)
                print "Reads aligned."
            else:
                print "Loading BAM files..."
                (bam_files, job) = mapseq.get_bam_wig_files(ex, job, minilims=opt.ms_limspath, hts_url=mapseq_url,
                                                            script_path=gl.get('script_path') or '', via=opt.via )
                print "Loaded."
            assert bam_files, "Bam files not found."
            print "Current working directory:", ex.working_directory
            rnaseq.rnaseq_workflow(ex, job, assembly, bam_files, pileup_level=pileup_level, via=opt.via)
        # End of program body #

        # GDV
        allfiles = common.get_files(ex.id, M)
        if 'gdv_project' in job.options and 'sql' in allfiles:
            allfiles['url'] = {job.options['gdv_project']['public_url']: 'GDV view'}
            download_url = gl['hts_rnapseq']['download']
            [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'],
                                job.options['gdv_project']['project_id'],
                                url=download_url+str(k),
                                name = re.sub('\.sql','',str(f)),
                                gdv_url=gl['gdv']['url'] )
             for k,f in allfiles['sql'].iteritems()]
        print json.dumps(allfiles)

        # E-mail
        if 'email' in gl:
            r = email.EmailReport( sender=gl['email']['sender'],
                                   to=str(job.email),
                                   subject="RNA-seq job "+str(job.description),
                                   smtp_server=gl['email']['smtp'] )
            r.appendBody('''Your RNA-seq job is finished.
                \n The description was: '''+str(job.description)+''' and its unique key is '''+opt.key+'''.
                \n You can retrieve the results at this url: '''+gl['hts_rnaseq']['url']+"jobs/"+opt.key+"/get_results" )
            r.send()

        sys.exit(0)
    except Usage, err:
        print >>sys.stderr, err.msg
        print >>sys.stderr, usage
        return 2
Beispiel #6
0
def main(argv = None):
    via = "lsf"
    limspath = None
    hts_key = ''
    working_dir = None
    config_file = None
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts,args = getopt.getopt(sys.argv[1:],"hu:k:d:w:c:",
                                      ["help","via=","key=","minilims=",
                                       "working-directory=","config="])
        except getopt.error, msg:
            raise Usage(msg)
        for o, a in opts:
            if o in ("-h", "--help"):
                print __doc__
                print usage
                return 0
            elif o in ("-u", "--via"):
                if a=="local":
                    via = "local"
                elif a=="lsf":
                    via = "lsf"
                else:
                    raise Usage("Via (-u) can only be \"local\" or \"lsf\", got %s." % (a,))
            elif o in ("-w", "--working-directory"):
                if os.path.exists(a):
                    os.chdir(a)
                    working_dir = a
                else:
                    raise Usage("Working directory '%s' does not exist." % a)
            elif o in ("-d", "--minilims"):
                limspath = a
            elif o in ("-k", "--key"):
                hts_key = a
            elif o in ("-c", "--config"):
                config_file = a
            else:
                raise Usage("Unhandled option: " + o)
        if not(limspath and os.path.exists(limspath)
               and (hts_key != None or (config_file and os.path.exists(config_file)))):
            raise Usage("Need a minilims and a job key or a configuration file")
        M = MiniLIMS( limspath )
        if len(hts_key)>1:
            gl = use_pickle(M, "global variables")
            htss = frontend.Frontend( url=gl['hts_mapseq']['url'] )
            job = htss.job( hts_key )
            [M.delete_execution(x) for x in M.search_executions(with_description=hts_key,fails=True)]
        elif os.path.exists(config_file):
            (job,gl) = frontend.parseConfig( config_file )
            hts_key = job.description
        else:
            raise ValueError("Need either a job key (-k) or a configuration file (-c).")
        g_rep = genrep.GenRep( url=gl["genrep_url"], root=gl["bwt_root"],
                               intype=job.options.get('input_type_id') or 0 )
        assembly = g_rep.assembly( job.assembly_id )
        if 'lims' in gl:
            dafl = dict((loc,daflims.DAFLIMS( username=gl['lims']['user'], password=pwd ))
                        for loc,pwd in gl['lims']['passwd'].iteritems())
        else:
            dafl = None
        if not('compute_densities' in job.options):
            job.options['compute_densities'] = True
        elif isinstance(job.options['compute_densities'],str):
            job.options['compute_densities'] = job.options['compute_densities'].lower() in ['1','true','t']
        if not('ucsc_bigwig' in job.options):
            job.options['ucsc_bigwig'] = True
        elif isinstance(job.options['ucsc_bigwig'],str):
            job.options['ucsc_bigwig'] = job.options['ucsc_bigwig'].lower() in ['1','true','t']
        job.options['ucsc_bigwig'] = job.options['ucsc_bigwig'] and job.options['compute_densities']
        if not('create_gdv_project' in job.options):
            job.options['create_gdv_project'] = False
        elif isinstance(job.options['create_gdv_project'],str):
            job.options['create_gdv_project'] = job.options['create_gdv_project'].lower() in ['1','true','t']
        if job.options.get('read_extension'):
            job.options['read_extension'] = int(job.options['read_extension'])
        if job.options.get('merge_strands'):
            job.options['merge_strands'] = int(job.options['merge_strands'])
        logfile = open(hts_key+".log",'w')
        with execution( M, description=hts_key, remote_working_directory=working_dir ) as ex:
            logfile.write("Enter execution, fetch fastq files.\n");logfile.flush()
            job = get_fastq_files( job, ex.working_directory, dafl )
            logfile.write("Map reads.\n");logfile.flush()
            mapped_files = map_groups( ex, job, ex.working_directory, assembly, {'via': via} )
            logfile.write("Make stats:\n");logfile.flush()
            for k,v in job.groups.iteritems():
                logfile.write(str(k)+str(v['name'])+"\t");logfile.flush()
                pdf = add_pdf_stats( ex, mapped_files,
                                     {k:v['name']},
                                     gl.get('script_path') or '',
                                     description=set_file_descr(v['name']+"_mapping_report.pdf",groupId=k,step='stats',type='pdf') )
            if job.options['compute_densities']:
                logfile.write("computing densities.\n");logfile.flush()
                if not(job.options.get('read_extension')>0):
                    job.options['read_extension'] = mapped_files.values()[0].values()[0]['stats']['read_length']
                density_files = densities_groups( ex, job, mapped_files, assembly.chromosomes, via=via )
                logfile.write("Finished computing densities.\n");logfile.flush()
                if job.options['create_gdv_project']:
                    logfile.write("Creating GDV project.\n");logfile.flush()
                    gdv_project = gdv.create_gdv_project( gl['gdv']['key'], gl['gdv']['email'],
                                                          job.description,
                                                          assembly.nr_assembly_id,
                                                          gdv_url=gl['gdv']['url'], public=True )
                    logfile.write("GDV project: "+str(gdv_project['project_id']+"\n"));logfile.flush()
                    add_pickle( ex, gdv_project, description=set_file_descr("gdv_json",step='gdv',type='py',view='admin') )
        allfiles = get_files( ex.id, M )
        if 'ucsc_bigwig' and g_rep.intype == 0:
            ucscfiles = get_files( ex.id, M, select_param={'ucsc':'1'} )
            with open(hts_key+".bed",'w') as ucscbed:
                for ftype,fset in ucscfiles.iteritems():
                    for ffile,descr in fset.iteritems():
                        if re.search(r' \(.*\)',descr): continue
                        ucscbed.write(track_header(descr,ftype,gl['hts_mapseq']['download'],ffile))
        if job.options['create_gdv_project']:
            allfiles['url'] = {gdv_project['public_url']: 'GDV view'}
            download_url = gl['hts_mapseq']['download']
            [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'],
                                gdv_project['project_id'],
                                url=download_url+str(k),
                                name = re.sub('\.sql','',str(f)),
                                gdv_url=gl['gdv']['url'] )
             for k,f in allfiles['sql'].iteritems()]
        logfile.close()
        print json.dumps(allfiles)
        with open(hts_key+".done",'w') as done:
            json.dump(allfiles,done)
        if 'email' in gl:
            r = email.EmailReport( sender=gl['email']['sender'],
                                   to=str(job.email),
                                   subject="Mapseq job "+str(job.description),
                                   smtp_server=gl['email']['smtp'] )
            r.appendBody('''
Your mapseq job has finished.

The description was:
'''+str(job.description)+'''
and its unique key is '''+hts_key+'''.

You can now retrieve the results at this url:
'''+gl['hts_mapseq']['url']+"jobs/"+hts_key+"/get_results")
            r.send()
        return 0
Beispiel #7
0
def main(argv = None):
    via = "lsf"
    limspath = None
    ms_limspath = "/data/htsstation/demultiplex/demultiplex_minilims"
    hts_key = None
    working_dir = None
    config = None
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts,args = getopt.getopt(sys.argv[1:],"hu:k:d:w:m:c:",
                                      ["help","via","key","minilims",
                                       "working-directory","config"])
        except getopt.error, msg:
            raise Usage(msg)
        for o, a in opts:
            if o in ("-h", "--help"):
                print __doc__
                print usage
                sys.exit(0)
            elif o in ("-u", "--via"):
                if a=="local":
                    via = "local"
                elif a=="lsf":
                    via = "lsf"
                else:
                    raise Usage("Via (-u) can only be \"local\" or \"lsf\", got %s." % (a,))
            elif o in ("-w", "--working-directory"):
                if os.path.exists(a):
                    os.chdir(a)
                    working_dir = a
                else:
                    raise Usage("Working directory '%s' does not exist." % a)
            elif o in ("-d", "--minilims"):
                limspath = a
            elif o in ("-k", "--key"):
                hts_key = a
            elif o in ("-c", "--config"):
                config_file = a
            else:
                raise Usage("Unhandled option: " + o)
    #temporary:  test_demultiplexing.py -i /scratch/cluster/monthly/mleleu/Nicolas/NL1_NoIndex_L004_R1.fastq -p /scratch/cluster/monthly/mleleu/Nicolas/primers4C_GT_NL.fa -x 22 -n 2 -s 77 -l 30 &
    #job={'description':'test_demultiplex_Nico', 'options':{'opt1':'','opt2':''} , 'group': {'grpName':'grpNameNico', 'run':{'runName':'runNameNico','fastaFile':'/scratch/cluster/monthly/mleleu/Nicolas/NL1_NoIndex_L004_R1_part.fastq'}}}
        M = MiniLIMS( limspath )
        if len(hts_key)>1:
            gl = use_pickle( M, "global variables" )
            htss = frontend.Frontend( url=gl['hts_demultiplex']['url'] )
            job = htss.job( hts_key )
        elif os.path.exists(config_file):
            (job,gl) = frontend.parseConfig( config_file )
        else:
            raise ValueError("Need either a job key (-k) or a configuration file (-c).")
        job.options['ucsc_bigwig'] = True
        with execution( M, description=hts_key, remote_working_directory=working_dir ) as ex:
            demultiplex_files = demultiplex.workflow_groups( ex, job, gl['script_path'])
        allfiles = common.get_files( ex.id, M )

#        gdv_project = gdv.create_gdv_project( gl['gdv']['key'], gl['gdv']['email'],
 #                                               job.description,
  #                                              assembly.nr_assembly_id,
   #                                             gdv_url=gl['gdv']['url'], public=True )
    #    if 'sql' in allfiles:
     #       allfiles['url'] = {gdv_projec['public_url']: 'GDV view'}
     #       download_url = gl['hts_demultiplex']['download']
     #       [gdv.add_gdv_track( gl['gdv']['key'], gl['gdv']['email'],
     #                           gdv_project['project_id'],
     #                           url=download_url+str(k),
     #                           name = re.sub('\.sql','',str(f)),
     #                           gdv_url=gl['gdv']['url'] )
     #        for k,f in allfiles['sql'].iteritems()]
        print json.dumps(allfiles)